From b21fd029edc471719ce9f933182cbc9ea9f31409 Mon Sep 17 00:00:00 2001 From: pikastech Date: Thu, 29 Sep 2022 20:41:57 +0800 Subject: [PATCH] optimize str slice, time speed recuse 95% --- package/PikaStdLib/PikaStdData_String.c | 72 +++++++++++++------ port/linux/.vscode/launch.json | 2 +- .../PikaStdLib/PikaStdData_String.c | 72 +++++++++++++------ src/PikaVM.c | 24 +++---- src/PikaVersion.h | 2 +- test/python/issue/issue_big_str_slice.py | 2 + test/stddata-test.cpp | 14 ++++ 7 files changed, 126 insertions(+), 62 deletions(-) create mode 100644 test/python/issue/issue_big_str_slice.py diff --git a/package/PikaStdLib/PikaStdData_String.c b/package/PikaStdLib/PikaStdData_String.c index ccb13831d..1ff14d213 100644 --- a/package/PikaStdLib/PikaStdData_String.c +++ b/package/PikaStdLib/PikaStdData_String.c @@ -4,6 +4,7 @@ #include "dataStrs.h" char* _strlwr(char* str); +static int string_len(char* str); Arg* PikaStdData_String___iter__(PikaObj* self) { obj_setInt(self, "__iter_i", 0); @@ -64,29 +65,50 @@ Arg* PikaStdData_String___next__(PikaObj* self) { #endif } +static int _str_get(char* str, int key_i, char* char_buff) { + uint16_t len = strGetSize(str); + if (key_i < 0) { + key_i = string_len(str) + key_i; + } +#if PIKA_STRING_UTF8_ENABLE + return _utf8_get(str, len, key_i, char_buff); +#else + if (key_i < len) { + char_buff[0] = str[key_i]; + return 0; + } + return -1; +#endif +} + +char* string_slice(Args* outBuffs, char* str, int start, int end) { + char* res = args_getBuff(outBuffs, strGetSize(str)); + if (start < 0) { + start += string_len(str); + } + if (end < 0) { + end += string_len(str) + 1; + } + for (int i = start; i < end; i++) { + char char_buff[5] = {0}; + int r = _str_get(str, i, char_buff); + if (r < 0) { + return NULL; + } + res = strAppend(res, char_buff); + } + return res; +} + Arg* PikaStdData_String___getitem__(PikaObj* self, Arg* __key) { int key_i = arg_getInt(__key); - if (key_i < 0) { - key_i = PikaStdData_String___len__(self) + key_i; - } char* str = obj_getStr(self, "str"); - uint16_t len = strGetSize(str); -#if PIKA_STRING_UTF8_ENABLE - char char_buff[5]; - int r = _utf8_get(str, len, key_i, char_buff); + char char_buff[5] = {0}; + int r = _str_get(str, key_i, char_buff); if (r < 0) { return arg_newNull(); } return arg_newStr((char*)char_buff); -#else - char char_buff[] = " "; - if (key_i < len) { - char_buff[0] = str[key_i]; - return arg_newStr((char*)char_buff); - } else { - return arg_newNull(); - } -#endif } void PikaStdData_String___setitem__(PikaObj* self, Arg* __key, Arg* __val) { @@ -242,21 +264,25 @@ PikaObj* PikaStdData_String_split(PikaObj* self, char* s) { return list; } -int PikaStdData_String___len__(PikaObj* self) { - char* str = obj_getStr(self, "str"); +static int string_len(char* str) { #if PIKA_STRING_UTF8_ENABLE int n = _utf8_strlen(str, -1); - if (n < 0) { - obj_setErrorCode(self, __LINE__); - __platform_printf("Error. Internal error(%d)\r\n", __LINE__); - return n; - } return n; #else return strGetSize(str); #endif } +int PikaStdData_String___len__(PikaObj* self) { + char* str = obj_getStr(self, "str"); + int n = string_len(str); + if (n < 0) { + obj_setErrorCode(self, __LINE__); + __platform_printf("Error. Internal error(%d)\r\n", __LINE__); + } + return n; +} + char* PikaStdData_String_strip(PikaObj* self, PikaTuple* chrs) { Args buffs = {0}; char to_strip = ' '; diff --git a/port/linux/.vscode/launch.json b/port/linux/.vscode/launch.json index 8f313953d..604883ee2 100644 --- a/port/linux/.vscode/launch.json +++ b/port/linux/.vscode/launch.json @@ -11,7 +11,7 @@ "program": "${workspaceFolder}/build/test/pikascript_test", // "program": "${workspaceFolder}/build/boot/demo06-pikamain/pikascript_demo06-pikamain", "args": [ - // "--gtest_filter=proxy.test1" + "--gtest_filter=str.big_slice" ], "stopAtEntry": false, "cwd": "${workspaceFolder}", diff --git a/port/linux/package/pikascript/pikascript-lib/PikaStdLib/PikaStdData_String.c b/port/linux/package/pikascript/pikascript-lib/PikaStdLib/PikaStdData_String.c index ccb13831d..1ff14d213 100644 --- a/port/linux/package/pikascript/pikascript-lib/PikaStdLib/PikaStdData_String.c +++ b/port/linux/package/pikascript/pikascript-lib/PikaStdLib/PikaStdData_String.c @@ -4,6 +4,7 @@ #include "dataStrs.h" char* _strlwr(char* str); +static int string_len(char* str); Arg* PikaStdData_String___iter__(PikaObj* self) { obj_setInt(self, "__iter_i", 0); @@ -64,29 +65,50 @@ Arg* PikaStdData_String___next__(PikaObj* self) { #endif } +static int _str_get(char* str, int key_i, char* char_buff) { + uint16_t len = strGetSize(str); + if (key_i < 0) { + key_i = string_len(str) + key_i; + } +#if PIKA_STRING_UTF8_ENABLE + return _utf8_get(str, len, key_i, char_buff); +#else + if (key_i < len) { + char_buff[0] = str[key_i]; + return 0; + } + return -1; +#endif +} + +char* string_slice(Args* outBuffs, char* str, int start, int end) { + char* res = args_getBuff(outBuffs, strGetSize(str)); + if (start < 0) { + start += string_len(str); + } + if (end < 0) { + end += string_len(str) + 1; + } + for (int i = start; i < end; i++) { + char char_buff[5] = {0}; + int r = _str_get(str, i, char_buff); + if (r < 0) { + return NULL; + } + res = strAppend(res, char_buff); + } + return res; +} + Arg* PikaStdData_String___getitem__(PikaObj* self, Arg* __key) { int key_i = arg_getInt(__key); - if (key_i < 0) { - key_i = PikaStdData_String___len__(self) + key_i; - } char* str = obj_getStr(self, "str"); - uint16_t len = strGetSize(str); -#if PIKA_STRING_UTF8_ENABLE - char char_buff[5]; - int r = _utf8_get(str, len, key_i, char_buff); + char char_buff[5] = {0}; + int r = _str_get(str, key_i, char_buff); if (r < 0) { return arg_newNull(); } return arg_newStr((char*)char_buff); -#else - char char_buff[] = " "; - if (key_i < len) { - char_buff[0] = str[key_i]; - return arg_newStr((char*)char_buff); - } else { - return arg_newNull(); - } -#endif } void PikaStdData_String___setitem__(PikaObj* self, Arg* __key, Arg* __val) { @@ -242,21 +264,25 @@ PikaObj* PikaStdData_String_split(PikaObj* self, char* s) { return list; } -int PikaStdData_String___len__(PikaObj* self) { - char* str = obj_getStr(self, "str"); +static int string_len(char* str) { #if PIKA_STRING_UTF8_ENABLE int n = _utf8_strlen(str, -1); - if (n < 0) { - obj_setErrorCode(self, __LINE__); - __platform_printf("Error. Internal error(%d)\r\n", __LINE__); - return n; - } return n; #else return strGetSize(str); #endif } +int PikaStdData_String___len__(PikaObj* self) { + char* str = obj_getStr(self, "str"); + int n = string_len(str); + if (n < 0) { + obj_setErrorCode(self, __LINE__); + __platform_printf("Error. Internal error(%d)\r\n", __LINE__); + } + return n; +} + char* PikaStdData_String_strip(PikaObj* self, PikaTuple* chrs) { Args buffs = {0}; char to_strip = ' '; diff --git a/src/PikaVM.c b/src/PikaVM.c index 2c164ee03..17383ddd3 100644 --- a/src/PikaVM.c +++ b/src/PikaVM.c @@ -366,21 +366,17 @@ Arg* __vm_slice(PikaObj* self, Arg* end, Arg* obj, Arg* start, int step) { } if (ARG_TYPE_STRING == arg_getType(obj)) { - size_t len = strGetSize(arg_getStr(obj)); - if (start_i < 0) { - start_i += len; - } - if (end_i < 0) { - end_i += len + 1; - } - Arg* sliced_arg = arg_newStr(""); - for (int i = start_i; i < end_i; i++) { - Arg* i_arg = arg_newInt(i); - Arg* item_arg = __vm_get(self, i_arg, obj); - sliced_arg = arg_strAppend(sliced_arg, arg_getStr(item_arg)); - arg_deinit(item_arg); - arg_deinit(i_arg); + char* string_slice(Args * outBuffs, char* str, int start, int end); + Args buffs = {0}; + Arg* sliced_arg = NULL; + char* sliced_str = + string_slice(&buffs, arg_getStr(obj), start_i, end_i); + if (NULL != sliced_str) { + sliced_arg = arg_newStr(sliced_str); + } else { + sliced_arg = arg_newNull(); } + strsDeinit(&buffs); return sliced_arg; } diff --git a/src/PikaVersion.h b/src/PikaVersion.h index 0ed61f9f1..649efe5c8 100644 --- a/src/PikaVersion.h +++ b/src/PikaVersion.h @@ -2,4 +2,4 @@ #define PIKA_VERSION_MINOR 11 #define PIKA_VERSION_MICRO 2 -#define PIKA_EDIT_TIME "2022/09/28 10:56:54" +#define PIKA_EDIT_TIME "2022/09/29 20:41:55" diff --git a/test/python/issue/issue_big_str_slice.py b/test/python/issue/issue_big_str_slice.py new file mode 100644 index 000000000..6baa11164 --- /dev/null +++ b/test/python/issue/issue_big_str_slice.py @@ -0,0 +1,2 @@ +s = 'alsd;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;l;kfjweo;fij;lkdjflkjlkvjzhxcviuhrurqpeowirupoi1poij;lkja;lsdkfjljzohugoiurhgqpwoeirj;lkjkj;liupoUPOIUPOIUPoiuPOIUPOIupoqij;lkjf;lqkjd;flkajsd;lfa;lsdhfouehfpqoi4ur[iroup0&)(*&)98uq4;ljkf;ksdmf;aksdf;awhefpouhpoi1u2p1o3uu8upaosidfae;wlfkjOO*)*FEPOFija;lwkejf;alkdsjfa;lwekjf;l1kj23;l4kj12;l34kj;lakjsdfoaiuijlvjlkjblkajhdfoihwepfoiweoqriuqweoirjq;wlekjr30upoiOIJ' +s1 = s[1:len(s) - 1] diff --git a/test/stddata-test.cpp b/test/stddata-test.cpp index e0d0c3a08..50262bc84 100644 --- a/test/stddata-test.cpp +++ b/test/stddata-test.cpp @@ -426,4 +426,18 @@ TEST(str, strip) { EXPECT_EQ(pikaMemNow(), 0); } +TEST(str, big_slice) { + /* init */ + pikaMemInfo.heapUsedMax = 0; + PikaObj* pikaMain = newRootObj("pikaMain", New_PikaMain); + /* run */ + __platform_printf("BEGIN\r\n"); + pikaVM_runSingleFile(pikaMain, "test/python/issue/issue_big_str_slice.py"); + /* collect */ + /* assert */ + /* deinit */ + obj_deinit(pikaMain); + EXPECT_EQ(pikaMemNow(), 0); +} + #endif