mirror of
https://gitee.com/Lyon1998/pikapython.git
synced 2025-01-15 17:02:53 +08:00
d3026c7c99
add test for slice utf-8
880 lines
20 KiB
C
880 lines
20 KiB
C
#include "PikaStdData_String.h"
|
|
#include "PikaStdData_List.h"
|
|
#include "PikaStdData_String_Util.h"
|
|
#include "dataStrs.h"
|
|
|
|
char* _strlwr(char *str);
|
|
|
|
Arg *PikaStdData_String___iter__(PikaObj *self)
|
|
{
|
|
obj_setInt(self, "__iter_i", 0);
|
|
return arg_newRef(self);
|
|
}
|
|
|
|
void PikaStdData_String_set(PikaObj *self, char *s)
|
|
{
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
int r = _valid_utf8(s, -1);
|
|
if (r >= 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error invaliad character %x\r\n", s[r]);
|
|
return;
|
|
}
|
|
#endif
|
|
obj_setStr(self, "str", s);
|
|
}
|
|
|
|
void PikaStdData_String___init__(PikaObj *self, char *s)
|
|
{
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
int r = _valid_utf8(s, -1);
|
|
if (r >= 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error invaliad character %x\r\n", s[r]);
|
|
return;
|
|
}
|
|
#endif
|
|
PikaStdData_String_set(self, s);
|
|
}
|
|
|
|
char *PikaStdData_String_get(PikaObj *self)
|
|
{
|
|
return obj_getStr(self, "str");
|
|
}
|
|
|
|
Arg *PikaStdData_String___next__(PikaObj *self)
|
|
{
|
|
int __iter_i = args_getInt(self->list, "__iter_i");
|
|
char *str = obj_getStr(self, "str");
|
|
uint16_t len = strGetSize(str);
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
char char_buff[5];
|
|
int r = _utf8_get(str, len, __iter_i, char_buff);
|
|
if (r < 0)
|
|
{
|
|
return arg_newNull();
|
|
}
|
|
args_setInt(self->list, "__iter_i", __iter_i + 1);
|
|
return arg_newStr((char *)char_buff);
|
|
#else
|
|
Arg *res = NULL;
|
|
char char_buff[] = " ";
|
|
if (__iter_i < len)
|
|
{
|
|
char_buff[0] = str[__iter_i];
|
|
res = arg_newStr((char *)char_buff);
|
|
}
|
|
else
|
|
{
|
|
return arg_newNull();
|
|
}
|
|
args_setInt(self->list, "__iter_i", __iter_i + 1);
|
|
return res;
|
|
#endif
|
|
}
|
|
|
|
Arg *PikaStdData_String___getitem__(PikaObj *self, Arg *__key)
|
|
{
|
|
int key_i = arg_getInt(__key);
|
|
if (key_i < 0){
|
|
key_i = PikaStdData_String___len__(self) + key_i;
|
|
}
|
|
char *str = obj_getStr(self, "str");
|
|
uint16_t len = strGetSize(str);
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
char char_buff[5];
|
|
int r = _utf8_get(str, len, key_i, char_buff);
|
|
if (r < 0)
|
|
{
|
|
return arg_newNull();
|
|
}
|
|
return arg_newStr((char *)char_buff);
|
|
#else
|
|
char char_buff[] = " ";
|
|
if (key_i < len)
|
|
{
|
|
char_buff[0] = str[key_i];
|
|
return arg_newStr((char *)char_buff);
|
|
}
|
|
else
|
|
{
|
|
return arg_newNull();
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void PikaStdData_String___setitem__(PikaObj *self, Arg *__key, Arg *__val)
|
|
{
|
|
int key_i = arg_getInt(__key);
|
|
char *str = obj_getStr(self, "str");
|
|
char *val = arg_getStr(__val);
|
|
uint16_t len = strGetSize(str);
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
int len2 = strlen(val);
|
|
int is_invalid = _valid_utf8(val, len2);
|
|
if (is_invalid >= 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error String invalid\r\n");
|
|
return;
|
|
}
|
|
int ulen_val = _utf8_strlen(val, len2);
|
|
if (ulen_val != 1)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error String invalid char\r\n");
|
|
return;
|
|
}
|
|
int char_len;
|
|
int repl_at = _utf8_get_offset(str, len, key_i, &char_len);
|
|
if (repl_at < 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error String Overflow\r\n");
|
|
return;
|
|
}
|
|
int ok = __str_repl(self, str, len, repl_at, char_len, val, len2);
|
|
if (ok < 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error. Internal error(-%d)\r\n", __LINE__);
|
|
return;
|
|
}
|
|
#else
|
|
if (key_i >= len)
|
|
{
|
|
obj_setErrorCode(self, 1);
|
|
__platform_printf("Error String Overflow\r\n");
|
|
return;
|
|
}
|
|
str[key_i] = val[0];
|
|
#endif
|
|
}
|
|
|
|
char *PikaStdData_String___str__(PikaObj *self)
|
|
{
|
|
return obj_getStr(self, "str");
|
|
}
|
|
|
|
int PikaStdData_String_startswith(PikaObj *self, char *prefix)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
char *p = prefix;
|
|
int i = 0;
|
|
while (*p != '\0')
|
|
{
|
|
if (*p != str[i])
|
|
return 0;
|
|
p++;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_endswith(PikaObj *self, char *suffix)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int len1 = strlen(str);
|
|
int len2 = strlen(suffix);
|
|
while (len2 >= 1)
|
|
{
|
|
if (suffix[len2 - 1] != str[len1 - 1])
|
|
return 0;
|
|
len2--;
|
|
len1--;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_isdigit(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
if (!isdigit((int)str[i]))
|
|
return 0;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_islower(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
if (!islower((int)str[i]))
|
|
return 0;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_isalnum(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
if (!isalnum((int)str[i]))
|
|
return 0;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_isalpha(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
if (!isalpha((int)str[i]))
|
|
return 0;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
int PikaStdData_String_isspace(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
if (!isspace((int)str[i]))
|
|
return 0;
|
|
i++;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
PikaObj *PikaStdData_String_split(PikaObj *self, char *s)
|
|
{
|
|
/* 创建 list 对象 */
|
|
PikaObj *list = newNormalObj(New_PikaStdData_List);
|
|
/* 初始化 list */
|
|
PikaStdData_List___init__(list);
|
|
|
|
Args buffs = {0};
|
|
char *str = strsCopy(&buffs, obj_getStr(self, "str"));
|
|
|
|
char sign = s[0];
|
|
int token_num = strCountSign(str, sign) + 1;
|
|
|
|
for (int i = 0; i < token_num; i++)
|
|
{
|
|
char *token = strsPopToken(&buffs, str, sign);
|
|
/* 用 arg_set<type> 的 api 创建 arg */
|
|
Arg *token_arg = arg_newStr(token);
|
|
/* 添加到 list 对象 */
|
|
PikaStdData_List_append(list, token_arg);
|
|
/* 销毁 arg */
|
|
arg_deinit(token_arg);
|
|
}
|
|
|
|
strsDeinit(&buffs);
|
|
return list;
|
|
}
|
|
|
|
int PikaStdData_String___len__(PikaObj *self)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
int n = _utf8_strlen(str, -1);
|
|
if (n < 0)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error. Internal error(%d)\r\n", __LINE__);
|
|
return n;
|
|
}
|
|
return n;
|
|
#else
|
|
return strGetSize(str);
|
|
#endif
|
|
}
|
|
|
|
char *PikaStdData_String_strip(PikaObj *self)
|
|
{
|
|
Args buffs = {0};
|
|
char *str = strsCopy(&buffs, obj_getStr(self, "str"));
|
|
/* strip */
|
|
char *str_start = str;
|
|
for (size_t i = 0; i < strGetSize(str); i++)
|
|
{
|
|
if (str[i] != ' ')
|
|
{
|
|
str_start = (char *)(str + i);
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (int i = strGetSize(str) - 1; i >= 0; i--)
|
|
{
|
|
if (str[i] != ' ')
|
|
{
|
|
str[i + 1] = '\0';
|
|
break;
|
|
}
|
|
}
|
|
|
|
obj_setStr(self, "_buf", str_start);
|
|
strsDeinit(&buffs);
|
|
return obj_getStr(self, "_buf");
|
|
}
|
|
|
|
char *PikaStdData_String_replace(PikaObj *self, char *old, char *new)
|
|
{
|
|
Args buffs = {0};
|
|
char *str = strsCopy(&buffs, obj_getStr(self, "str"));
|
|
str = strsReplace(&buffs, str, old, new);
|
|
obj_setStr(self, "_buf", str);
|
|
strsDeinit(&buffs);
|
|
return obj_getStr(self, "_buf");
|
|
}
|
|
|
|
|
|
Arg *PikaStdData_String_encode(PikaObj *self, PikaTuple *encoding)
|
|
{
|
|
char *str = obj_getStr(self, "str");
|
|
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
char *to_code = NULL;
|
|
int argn = tuple_getSize(encoding);
|
|
if (argn < 1)
|
|
{
|
|
return arg_newBytes((uint8_t *)str, strGetSize(str));
|
|
}
|
|
Arg *arg_i = tuple_getArg(encoding, 0);
|
|
if (arg_getType(arg_i) != ARG_TYPE_STRING)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error invaliad arguments\r\n");
|
|
return NULL;
|
|
}
|
|
to_code = arg_getStr(arg_i);
|
|
_strlwr(to_code);
|
|
Arg *res = _str_encode(str, to_code);
|
|
if (!res)
|
|
{
|
|
obj_setErrorCode(self, __LINE__);
|
|
__platform_printf("Error internal error\r\n");
|
|
return NULL;
|
|
}
|
|
return res;
|
|
#else
|
|
return arg_newBytes((uint8_t *)str, strGetSize(str));
|
|
#endif
|
|
}
|
|
|
|
#if PIKA_STRING_UTF8_ENABLE
|
|
|
|
static const uint8_t _pcre_utf8_table4[] = {
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
|
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5};
|
|
|
|
const char mask1 = 0x80;
|
|
const char mask2 = 0xc0;
|
|
const char mask3 = 0xe0;
|
|
const char mask4 = 0xf0;
|
|
|
|
const char nmask1 = 0x3f;
|
|
const char nmask2 = 0x1f;
|
|
const char nmask3 = 0x0f;
|
|
const char nmask4 = 0x07;
|
|
|
|
int _valid_utf8(const char *string, int length)
|
|
{
|
|
const uint8_t *p;
|
|
if (length < 0)
|
|
{
|
|
length = strlen(string);
|
|
}
|
|
|
|
for (p = (const uint8_t *)string; length-- > 0; p++)
|
|
{
|
|
int ab;
|
|
int c = *p;
|
|
if (!(c & 0x80))
|
|
continue;
|
|
if (c < 0xc0)
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
ab = _pcre_utf8_table4[c & 0x3f];
|
|
if (length < ab || ab > 3)
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
length -= ab;
|
|
if ((*(++p) & 0xc0) != 0x80)
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
switch (ab)
|
|
{
|
|
case 1:
|
|
if ((c & 0x3e) == 0)
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
continue;
|
|
case 2:
|
|
if ((c == 0xe0 && (*p & 0x20) == 0) ||
|
|
(c == 0xed && *p >= 0xa0))
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
break;
|
|
case 3:
|
|
if ((c == 0xf0 && (*p & 0x30) == 0) ||
|
|
(c > 0xf4) ||
|
|
(c == 0xf4 && *p > 0x8f))
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
break;
|
|
}
|
|
while (--ab > 0)
|
|
{
|
|
if ((*(++p) & 0xc0) != 0x80)
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
int _utf8_get(const char *string, int length, int at, char *out_buf)
|
|
{
|
|
const uint8_t *p;
|
|
int ab, c;
|
|
if (length < 0)
|
|
{
|
|
length = strlen(string);
|
|
}
|
|
if (at < 0 || at >= length)
|
|
return -1;
|
|
|
|
for (p = (const uint8_t *)string; length > 0 && at; p++, at--)
|
|
{
|
|
c = *p;
|
|
if (!(c & 0x80))
|
|
{
|
|
length--;
|
|
continue;
|
|
}
|
|
ab = _pcre_utf8_table4[c & 0x3f];
|
|
p += ab++;
|
|
length -= ab;
|
|
}
|
|
if (at || length <= 0)
|
|
return -2;
|
|
c = *p;
|
|
if (!(c & 0x80))
|
|
{
|
|
*out_buf = c;
|
|
out_buf[1] = 0;
|
|
return 1;
|
|
};
|
|
ab = _pcre_utf8_table4[c & 0x3f] + 1;
|
|
__platform_memcpy(out_buf, p, ab);
|
|
out_buf[ab] = '\0';
|
|
return ab;
|
|
}
|
|
int _utf8_get_offset(const char *string, int length, int at, int *out_char_len)
|
|
{
|
|
const uint8_t *p;
|
|
int ab, c;
|
|
if (length < 0)
|
|
{
|
|
length = strlen(string);
|
|
}
|
|
if (at < 0 || at >= length)
|
|
return -1;
|
|
|
|
for (p = (const uint8_t *)string; length > 0 && at; p++, at--)
|
|
{
|
|
c = *p;
|
|
if (!(c & 0x80))
|
|
{
|
|
length--;
|
|
continue;
|
|
}
|
|
ab = _pcre_utf8_table4[c & 0x3f];
|
|
p += ab++;
|
|
length -= ab;
|
|
}
|
|
if (at)
|
|
return -2;
|
|
c = *p;
|
|
if (!(c & 0x80))
|
|
{
|
|
if (out_char_len)
|
|
*out_char_len = 1;
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
};
|
|
ab = _pcre_utf8_table4[c & 0x3f] + 1;
|
|
if (out_char_len)
|
|
*out_char_len = ab;
|
|
return (uintptr_t)p - (uintptr_t)string;
|
|
}
|
|
int _utf8_strlen(const char *string, int length)
|
|
{
|
|
const uint8_t *p;
|
|
int i, ab, c;
|
|
if (length < 0)
|
|
{
|
|
length = strlen(string);
|
|
}
|
|
|
|
for (i = 0, p = (const uint8_t *)string; length > 0; i++, p++)
|
|
{
|
|
c = *p;
|
|
if (!(c & 0x80))
|
|
{
|
|
length--;
|
|
continue;
|
|
}
|
|
ab = _pcre_utf8_table4[c & 0x3f];
|
|
p += ab++;
|
|
length -= ab;
|
|
}
|
|
if (length < 0)
|
|
return -1;
|
|
return i;
|
|
}
|
|
|
|
int __str_repl(PikaObj *self, char *str, int str_len, int repl_at, int repl_len, char *val, int val_len)
|
|
{
|
|
if (val_len > repl_len)
|
|
{
|
|
str[repl_at] = 0;
|
|
Arg *s_new = arg_newStr(str);
|
|
if (!s_new)
|
|
return -1;
|
|
s_new = arg_strAppend(s_new, val);
|
|
s_new = arg_strAppend(s_new, str + repl_at + repl_len);
|
|
obj_removeArg(self, "str");
|
|
int rs = obj_setArg(self, "str", s_new);
|
|
arg_deinit(s_new);
|
|
if (rs)
|
|
return -rs;
|
|
return 0;
|
|
}
|
|
char *s = str + repl_at;
|
|
__platform_memcpy(s, val, val_len);
|
|
__platform_memmove(s + val_len, s + repl_len, str_len - repl_at - repl_len + 1);
|
|
return 0;
|
|
}
|
|
|
|
int __utf8_to_utf32_char_LE(const char *utf8, char *out_buf)
|
|
{
|
|
char c = *utf8;
|
|
if (!(c & mask1))
|
|
{
|
|
*out_buf = c;
|
|
out_buf[1] = 0;
|
|
out_buf[2] = 0;
|
|
out_buf[3] = 0;
|
|
return 1;
|
|
}
|
|
int left_length = _pcre_utf8_table4[c & 0x3f];
|
|
char a, b, d;
|
|
switch (left_length)
|
|
{
|
|
case 1:
|
|
a = c & nmask2;
|
|
b = utf8[1] & nmask1;
|
|
out_buf[0] = b | a << 6;
|
|
out_buf[1] = a >> 2;
|
|
out_buf[2] = 0;
|
|
out_buf[3] = 0;
|
|
return 2;
|
|
case 2:
|
|
a = c & nmask3;
|
|
b = utf8[1] & nmask1;
|
|
c = utf8[2] & nmask1;
|
|
out_buf[0] = c | b << 6;
|
|
out_buf[1] = b >> 2 | a << 4;
|
|
out_buf[2] = 0;
|
|
out_buf[3] = 0;
|
|
return 3;
|
|
case 3:
|
|
a = c & nmask4;
|
|
b = utf8[1] & nmask1;
|
|
c = utf8[2] & nmask1;
|
|
d = utf8[3] & nmask1;
|
|
out_buf[0] = d | c << 6;
|
|
out_buf[1] = c >> 2 | b << 4;
|
|
out_buf[2] = b >> 4 | a << 2;
|
|
out_buf[3] = 0;
|
|
return 4;
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
int __utf8_to_utf32_LE_noBOM_get_size(const char *utf8, int len)
|
|
{
|
|
char *p = (char*) utf8;
|
|
char buf[4];
|
|
int space_sum = 0;
|
|
while (len > 0)
|
|
{
|
|
int size = __utf8_to_utf32_char_LE(p, buf);
|
|
if (!size)
|
|
return -1;
|
|
p += size;
|
|
len -= size;
|
|
space_sum++;
|
|
}
|
|
return space_sum * 4;
|
|
}
|
|
int __utf8_to_utf32_LE_noBOM(const char *utf8, int len, char *out_buf)
|
|
{
|
|
char *q = out_buf;
|
|
char *p = (char*) utf8;
|
|
while (len > 0)
|
|
{
|
|
int size = __utf8_to_utf32_char_LE(p, q);
|
|
if (!size)
|
|
return -1;
|
|
p += size;
|
|
len -= size;
|
|
q += 4;
|
|
}
|
|
return q - out_buf;
|
|
}
|
|
int __utf8_to_utf32_LE_withBOM(const char *utf8, int len, char *out_buf)
|
|
{
|
|
int size = __utf8_to_utf32_LE_noBOM(utf8, len, out_buf + 4);
|
|
if (size < 0)
|
|
{
|
|
return size;
|
|
}
|
|
out_buf[0] = '\xff';
|
|
out_buf[1] = '\xfe';
|
|
out_buf[2] = 0;
|
|
out_buf[3] = 0;
|
|
return size + 4;
|
|
}
|
|
|
|
int32_t __utf8_decode(const char *utf8, int left_length)
|
|
{
|
|
int ucode = -1;
|
|
char c = *utf8;
|
|
if (!(c & mask1))
|
|
{
|
|
return c;
|
|
}
|
|
char a, b, d;
|
|
switch (left_length)
|
|
{
|
|
case 1:
|
|
a = c & nmask2;
|
|
b = utf8[1] & nmask1;
|
|
ucode = b | (a & 0x03) << 6;
|
|
ucode |= (a >> 2) << 8;
|
|
break;
|
|
case 2:
|
|
a = c & nmask3;
|
|
b = utf8[1] & nmask1;
|
|
c = utf8[2] & nmask1;
|
|
ucode = c | (b & 0x03) << 6;
|
|
ucode |= (b >> 2 | a << 4) << 8;
|
|
break;
|
|
case 3:
|
|
a = c & nmask4;
|
|
b = utf8[1] & nmask1;
|
|
c = utf8[2] & nmask1;
|
|
d = utf8[3] & nmask1;
|
|
ucode = d | (c & 0x03) << 6;
|
|
ucode |= (c >> 2 | (b & 0x0f) << 4) << 8;
|
|
ucode |= (b >> 4 | a << 2) << 16;
|
|
break;
|
|
default:
|
|
return -1;
|
|
}
|
|
return ucode;
|
|
}
|
|
|
|
int __unicode_to_utf16_char_LE(int32_t u, char *out_buf)
|
|
{
|
|
if (!(u & 0xffff0000))
|
|
{
|
|
out_buf[0] = u & 0xff;
|
|
out_buf[1] = (u & 0xff00) >> 8;
|
|
return 2;
|
|
}
|
|
int32_t d = u - 0x10000;
|
|
int32_t L = d & 0x3ff;
|
|
int32_t U = d >> 10;
|
|
L = L | 0xdc00;
|
|
U = U | 0xd800;
|
|
out_buf[0] = U & 0xff;
|
|
out_buf[1] = (U & 0xff00) >> 8;
|
|
out_buf[2] = L & 0xff;
|
|
out_buf[3] = (L & 0xff00) >> 8;
|
|
return 4;
|
|
}
|
|
int __utf8_to_utf16_LE_noBOM(const char *utf8, int len, char *out_buf)
|
|
{
|
|
char *q = out_buf;
|
|
char *p = (char*) utf8;
|
|
while (len > 0)
|
|
{
|
|
char c = *p;
|
|
int32_t ucode;
|
|
if (!(c & mask1))
|
|
{
|
|
ucode = c;
|
|
p++;
|
|
len--;
|
|
}
|
|
else
|
|
{
|
|
int left_size = _pcre_utf8_table4[c & 0x3f];
|
|
ucode = __utf8_decode(p, left_size++);
|
|
if (ucode < 0)
|
|
return ucode;
|
|
p += left_size;
|
|
len -= left_size;
|
|
}
|
|
int size = __unicode_to_utf16_char_LE(ucode, q);
|
|
q += size;
|
|
}
|
|
return q - out_buf;
|
|
}
|
|
int __utf8_to_utf16_LE_noBOM_get_size(const char *utf8, int len)
|
|
{
|
|
char out_buf[4];
|
|
char *p = (char*) utf8;
|
|
int need_space = 0;
|
|
while (len > 0)
|
|
{
|
|
char c = *p;
|
|
int32_t ucode;
|
|
if (!(c & mask1))
|
|
{
|
|
ucode = c;
|
|
p++;
|
|
len--;
|
|
}
|
|
else
|
|
{
|
|
int left_size = _pcre_utf8_table4[c & 0x3f];
|
|
ucode = __utf8_decode(p, left_size++);
|
|
if (ucode < 0)
|
|
return ucode;
|
|
p += left_size;
|
|
len -= left_size;
|
|
}
|
|
int size = __unicode_to_utf16_char_LE(ucode, out_buf);
|
|
need_space += size;
|
|
}
|
|
return need_space;
|
|
}
|
|
int __utf8_to_utf16_LE_withBOM(const char *utf8, int len, char *out_buf)
|
|
{
|
|
int size = __utf8_to_utf16_LE_noBOM(utf8, len, out_buf + 2);
|
|
if (size < 0)
|
|
{
|
|
return size;
|
|
}
|
|
out_buf[0] = '\xff';
|
|
out_buf[1] = '\xfe';
|
|
return size + 2;
|
|
}
|
|
Arg *_str_encode(char *str, char *encoding)
|
|
{
|
|
if (strEqu(encoding, "utf-8"))
|
|
{
|
|
return arg_newBytes((uint8_t *)str, strGetSize(str));
|
|
}
|
|
int len = strlen(str);
|
|
|
|
if (strEqu(encoding, "ascii"))
|
|
{
|
|
int ulen = _utf8_strlen(str, len);
|
|
if (ulen == len)
|
|
{
|
|
return arg_newBytes((uint8_t *)str, strGetSize(str));
|
|
}
|
|
__platform_printf("Warning there is non-ascii characters\r\n");
|
|
char *b = (char *)pikaMalloc(len + 1);
|
|
if (!b)
|
|
{
|
|
return NULL;
|
|
}
|
|
char *p = str;
|
|
char *q = b;
|
|
char c = *p++;
|
|
while (c)
|
|
{
|
|
if (!(c & 0x80))
|
|
{
|
|
*q++ = c;
|
|
}
|
|
c = *p++;
|
|
}
|
|
*q = 0;
|
|
Arg *arg = arg_newBytes((uint8_t *)b, strGetSize(b));
|
|
pikaFree(b, len + 1);
|
|
return arg;
|
|
}
|
|
|
|
if (strEqu(encoding, "utf-16"))
|
|
{
|
|
int size_needed = __utf8_to_utf16_LE_noBOM_get_size(str, len);
|
|
if (size_needed <= 0)
|
|
{
|
|
return NULL;
|
|
}
|
|
size_needed += 2;
|
|
char *b = (char *)pikaMalloc(size_needed);
|
|
if (!b)
|
|
{
|
|
return NULL;
|
|
}
|
|
int ok = __utf8_to_utf16_LE_withBOM(str, len, b);
|
|
if (ok < 0)
|
|
{
|
|
pikaFree(b, size_needed);
|
|
return NULL;
|
|
}
|
|
Arg *arg = arg_newBytes((uint8_t *)b, size_needed);
|
|
pikaFree(b, size_needed);
|
|
return arg;
|
|
}
|
|
|
|
if (strEqu(encoding, "utf-32"))
|
|
{
|
|
int size_needed = __utf8_to_utf32_LE_noBOM_get_size(str, len);
|
|
if (size_needed <= 0)
|
|
{
|
|
return NULL;
|
|
}
|
|
size_needed += 4;
|
|
char *b = (char *)pikaMalloc(size_needed);
|
|
if (!b)
|
|
{
|
|
return NULL;
|
|
}
|
|
int ok = __utf8_to_utf32_LE_withBOM(str, len, b);
|
|
if (ok < 0)
|
|
{
|
|
pikaFree(b, size_needed);
|
|
return NULL;
|
|
}
|
|
Arg *arg = arg_newBytes((uint8_t *)b, size_needed);
|
|
pikaFree(b, size_needed);
|
|
return arg;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
char* _strlwr(char *str)
|
|
{
|
|
int i = 0;
|
|
while (str[i] != '\0')
|
|
{
|
|
str[i] = tolower((int)str[i]);
|
|
i++;
|
|
}
|
|
return str;
|
|
}
|
|
|
|
#endif
|