2963 lines
92 KiB
C
Raw Normal View History

2022-06-24 00:21:00 +08:00
/*
* This file is part of the PikaScript project.
* http://github.com/pikastech/pikascript
*
* MIT License
*
* Copyright (c) 2021 lyon liang6516@outlook.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "PikaParser.h"
#include "BaseObj.h"
#include "PikaObj.h"
#include "dataQueue.h"
#include "dataQueueObj.h"
#include "dataStack.h"
#include "dataStrs.h"
/* local head */
typedef QueueObj AST;
2022-09-16 21:01:18 +08:00
char* AST_genAsm(AST* ast, Args* outBuffs);
2022-08-26 00:51:41 +08:00
char* Lexer_parseLine(Args* outBuffs, char* stmt);
2022-06-24 00:21:00 +08:00
int32_t AST_deinit(AST* ast);
2022-08-26 00:51:41 +08:00
char* Parser_linesToAsm(Args* outBuffs, char* multiLine);
2022-06-24 00:21:00 +08:00
uint8_t Parser_isContainToken(char* tokens,
enum TokenType token_type,
char* pyload);
2022-09-16 21:01:18 +08:00
void Cursor_init(struct Cursor* cs);
void Cursor_parse(struct Cursor* cs, char* stmt);
void Cursor_deinit(struct Cursor* cs);
void Cursor_beforeIter(struct Cursor* cs);
void Cursor_iterStart(struct Cursor* cs);
void Cursor_iterEnd(struct Cursor* cs);
char* Cursor_popToken(Args* buffs, char** tokens, char* devide);
2022-06-24 00:21:00 +08:00
char* Parser_popToken(Args* buffs_p, char* tokens);
uint16_t Tokens_getSize(char* tokens) {
if (strEqu("", tokens)) {
return 0;
}
return strCountSign(tokens, 0x1F) + 1;
}
char* strsPopTokenWithSkip_byStr(Args* outBuffs,
char* stmts,
char* str,
char skipStart,
char skipEnd) {
uint8_t divider_index = 0;
Arg* keeped_arg = arg_newStr("");
Arg* poped_arg = arg_newStr("");
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmts) {
Cursor_iterStart(&cs);
if (cs.branket_deepth == 0) {
if (strEqu(str, cs.token1.pyload)) {
divider_index = cs.iter_index;
2022-06-24 00:21:00 +08:00
}
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
Cursor_forEachTokenExistPs(cs, stmts) {
Cursor_iterStart(&cs);
if (cs.iter_index < divider_index) {
poped_arg = arg_strAppend(poped_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (cs.iter_index > divider_index) {
keeped_arg = arg_strAppend(keeped_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
char* keeped = arg_getStr(keeped_arg);
char* poped = strsCopy(outBuffs, arg_getStr(poped_arg));
__platform_memcpy(stmts, keeped, strGetSize(keeped) + 1);
arg_deinit(poped_arg);
arg_deinit(keeped_arg);
return poped;
}
char* strsGetCleanCmd(Args* outBuffs, char* cmd) {
pika_assert(cmd != NULL);
2022-06-24 00:21:00 +08:00
int32_t size = strGetSize(cmd);
/* lexer may generate more chars than input */
char* strOut = args_getBuff(outBuffs, size * 2);
int32_t iOut = 0;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, cmd) {
Cursor_iterStart(&cs);
for (uint16_t k = 0; k < strGetSize(cs.token1.pyload); k++) {
strOut[iOut] = cs.token1.pyload[k];
2022-06-24 00:21:00 +08:00
iOut++;
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
/* add \0 */
strOut[iOut] = 0;
return strOut;
}
char* strsDeleteBetween(Args* buffs_p, char* strIn, char begin, char end) {
int32_t size = strGetSize(strIn);
char* strOut = args_getBuff(buffs_p, size);
uint8_t deepth = 0;
uint32_t iOut = 0;
for (int i = 0; i < size; i++) {
if (end == strIn[i]) {
deepth--;
}
if (0 == deepth) {
strOut[iOut] = strIn[i];
iOut++;
}
if (begin == strIn[i]) {
deepth++;
}
}
strOut[iOut] = 0;
return strOut;
}
static uint8_t Lexer_isError(char* line) {
Args buffs = {0};
uint8_t res = 0; /* not error */
2022-08-26 00:51:41 +08:00
char* tokens = Lexer_parseLine(&buffs, line);
2022-06-24 00:21:00 +08:00
if (NULL == tokens) {
res = 1; /* lex error */
goto exit;
}
goto exit;
exit:
strsDeinit(&buffs);
return res;
}
static char* __removeTokensBetween(Args* outBuffs,
char* input,
char* token_pyload1,
char* token_pyload2) {
Args buffs = {0};
uint8_t block_deepth = 0;
char* output = "";
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, input) {
Cursor_iterStart(&cs);
if (strEqu(token_pyload1, cs.token1.pyload)) {
2022-06-24 00:21:00 +08:00
if (block_deepth == 0) {
2022-09-16 21:01:18 +08:00
output = strsAppend(&buffs, output, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
block_deepth++;
}
2022-09-16 21:01:18 +08:00
if (strEqu(token_pyload2, cs.token1.pyload)) {
2022-06-24 00:21:00 +08:00
block_deepth--;
}
if (block_deepth == 0) {
2022-09-16 21:01:18 +08:00
output = strsAppend(&buffs, output, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
output = strsCopy(outBuffs, output);
strsDeinit(&buffs);
return output;
}
2022-09-24 21:02:44 +08:00
char* _remove_sub_stmt(Args* outBuffs, char* stmt) {
Args buffs = {0};
stmt = strsCopy(&buffs, stmt);
stmt = __removeTokensBetween(&buffs, stmt, "(", ")");
stmt = __removeTokensBetween(&buffs, stmt, "[", "]");
stmt = __removeTokensBetween(&buffs, stmt, "{", "}");
char* res = args_cacheStr(outBuffs, stmt);
strsDeinit(&buffs);
return res;
}
2022-06-24 00:21:00 +08:00
static enum StmtType Lexer_matchStmtType(char* right) {
Args buffs = {0};
enum StmtType stmtType = STMT_none;
2022-09-24 21:02:44 +08:00
char* rightWithoutSubStmt = _remove_sub_stmt(&buffs, right);
PIKA_BOOL is_get_operator = PIKA_FALSE;
PIKA_BOOL is_get_method = PIKA_FALSE;
PIKA_BOOL is_get_string = PIKA_FALSE;
PIKA_BOOL is_get_bytes = PIKA_FALSE;
PIKA_BOOL is_get_number = PIKA_FALSE;
PIKA_BOOL is_get_symbol = PIKA_FALSE;
PIKA_BOOL is_get_list = PIKA_FALSE;
PIKA_BOOL is_get_slice = PIKA_FALSE;
PIKA_BOOL is_get_dict = PIKA_FALSE;
PIKA_BOOL is_get_import = PIKA_FALSE;
PIKA_BOOL is_get_chain = PIKA_FALSE;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, rightWithoutSubStmt) {
Cursor_iterStart(&cs);
2022-06-24 00:21:00 +08:00
/* collect type */
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, " import ")) {
is_get_import = PIKA_TRUE;
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token2.pyload, "[")) {
/* (symble | iteral | <]> | <)>) + <[> */
2022-09-16 21:01:18 +08:00
if (TOKEN_symbol == cs.token1.type ||
TOKEN_literal == cs.token1.type ||
strEqu(cs.token1.pyload, "]") ||
strEqu(cs.token1.pyload, ")")) {
is_get_slice = PIKA_TRUE;
goto iter_continue;
}
/* ( <,> | <=> ) + <[> */
is_get_list = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "[") && cs.iter_index == 1) {
/* VOID + <[> */
is_get_list = PIKA_TRUE;
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "...")) {
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "pass")) {
goto iter_continue;
}
if (strIsStartWith(cs.token1.pyload, ".")) {
if (cs.iter_index != 1) {
is_get_chain = PIKA_TRUE;
goto iter_continue;
}
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "{")) {
is_get_dict = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (cs.token1.type == TOKEN_operator) {
is_get_operator = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
/* <(> */
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "(")) {
is_get_method = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (cs.token1.type == TOKEN_literal) {
if (cs.token1.pyload[0] == '\'' || cs.token1.pyload[0] == '"') {
is_get_string = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (cs.token1.pyload[1] == '\'' || cs.token1.pyload[1] == '"') {
if (cs.token1.pyload[0] == 'b') {
is_get_bytes = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
}
is_get_number = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (cs.token1.type == TOKEN_symbol) {
is_get_symbol = PIKA_TRUE;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
iter_continue:
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
if (is_get_import) {
stmtType = STMT_import;
goto exit;
}
if (is_get_operator) {
stmtType = STMT_operator;
goto exit;
}
2022-08-26 00:51:41 +08:00
if (is_get_chain) {
stmtType = STMT_chain;
goto exit;
}
if (is_get_slice) {
stmtType = STMT_slice;
goto exit;
}
if (is_get_list) {
2022-06-24 00:21:00 +08:00
stmtType = STMT_list;
goto exit;
}
if (is_get_dict) {
stmtType = STMT_dict;
goto exit;
}
2022-06-24 00:21:00 +08:00
if (is_get_method) {
stmtType = STMT_method;
goto exit;
}
if (is_get_string) {
stmtType = STMT_string;
goto exit;
}
if (is_get_bytes) {
stmtType = STMT_bytes;
goto exit;
}
if (is_get_number) {
stmtType = STMT_number;
goto exit;
}
if (is_get_symbol) {
stmtType = STMT_reference;
goto exit;
}
exit:
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
strsDeinit(&buffs);
return stmtType;
}
char* Lexer_printTokens(Args* outBuffs, char* tokens) {
2022-08-26 00:51:41 +08:00
pika_assert(tokens);
2022-06-24 00:21:00 +08:00
/* init */
Args buffs = {0};
char* printOut = strsCopy(&buffs, "");
/* process */
uint16_t token_size = Tokens_getSize(tokens);
for (uint16_t i = 0; i < token_size; i++) {
char* token = Parser_popToken(&buffs, tokens);
if (token[0] == TOKEN_operator) {
printOut = strsAppend(&buffs, printOut, "{opt}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_devider) {
printOut = strsAppend(&buffs, printOut, "{dvd}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_symbol) {
printOut = strsAppend(&buffs, printOut, "{sym}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_literal) {
printOut = strsAppend(&buffs, printOut, "{lit}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
}
/* out put */
printOut = strsCopy(outBuffs, printOut);
strsDeinit(&buffs);
return printOut;
}
uint8_t Parser_checkIsDirect(char* str) {
Args buffs = {0};
2022-08-26 00:51:41 +08:00
char* tokens = Lexer_parseLine(&buffs, str);
2022-06-24 00:21:00 +08:00
uint8_t res = 0;
pika_assert(NULL != tokens);
2022-06-24 00:21:00 +08:00
if (Parser_isContainToken(tokens, TOKEN_operator, "=")) {
res = 1;
goto exit;
}
exit:
strsDeinit(&buffs);
return res;
}
Arg* Lexer_setToken(Arg* tokens_arg,
enum TokenType token_type,
char*
operator) {
Args buffs = {0};
char token_type_buff[3] = {0};
token_type_buff[0] = 0x1F;
token_type_buff[1] = token_type;
char* tokens = arg_getStr(tokens_arg);
tokens = strsAppend(&buffs, tokens, token_type_buff);
tokens = strsAppend(&buffs, tokens, operator);
Arg* new_tokens_arg = arg_setStr(tokens_arg, "", tokens);
arg_deinit(tokens_arg);
strsDeinit(&buffs);
return new_tokens_arg;
}
Arg* Lexer_setSymbel(Arg* tokens_arg,
char* stmt,
int32_t i,
int32_t* symbol_start_index) {
Args buffs = {0};
char* symbol_buff = NULL;
if (-1 == *symbol_start_index) {
/* no found symbol start index */
goto exit;
}
/* nothing to add symbel */
if (i == *symbol_start_index) {
goto exit;
}
symbol_buff = args_getBuff(&buffs, i - *symbol_start_index);
__platform_memcpy(symbol_buff, stmt + *symbol_start_index,
i - *symbol_start_index);
/* literal */
if ((symbol_buff[0] == '\'') || (symbol_buff[0] == '"')) {
/* "" or '' */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
if ((symbol_buff[0] >= '0') && (symbol_buff[0] <= '9')) {
/* number */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
if ((symbol_buff[0] == 'b') &&
((symbol_buff[1] == '\'') || (symbol_buff[1] == '"'))) {
/* b"" or b'' */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
/* symbol */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_symbol, symbol_buff);
goto exit;
exit:
*symbol_start_index = -1;
strsDeinit(&buffs);
return tokens_arg;
}
/* tokens is devided by space */
/* a token is [TOKENTYPE|(CONTENT)] */
2022-08-26 00:51:41 +08:00
char* Lexer_parseLine(Args* outBuffs, char* stmt) {
2022-06-24 00:21:00 +08:00
/* init */
Arg* tokens_arg = New_arg(NULL);
tokens_arg = arg_setStr(tokens_arg, "", "");
int32_t size = strGetSize(stmt);
uint8_t bracket_deepth = 0;
2022-08-30 18:47:49 +08:00
uint8_t cn2 = 0;
2022-06-24 00:21:00 +08:00
uint8_t cn1 = 0;
uint8_t c0 = 0;
uint8_t c1 = 0;
uint8_t c2 = 0;
uint8_t c3 = 0;
uint8_t c4 = 0;
uint8_t c5 = 0;
uint8_t c6 = 0;
int32_t symbol_start_index = -1;
int is_in_string = 0;
2022-08-26 00:51:41 +08:00
int is_number = 0;
2022-06-24 00:21:00 +08:00
char* tokens;
/* process */
for (int32_t i = 0; i < size; i++) {
/* update char */
2022-08-30 18:47:49 +08:00
cn2 = 0;
2022-06-24 00:21:00 +08:00
cn1 = 0;
c0 = stmt[i];
c1 = 0;
c2 = 0;
c3 = 0;
c4 = 0;
c5 = 0;
c6 = 0;
2022-08-30 18:47:49 +08:00
if (i - 2 >= 0) {
cn2 = stmt[i - 2];
}
2022-06-24 00:21:00 +08:00
if (i - 1 >= 0) {
cn1 = stmt[i - 1];
}
if (i + 1 < size) {
c1 = stmt[i + 1];
}
if (i + 2 < size) {
c2 = stmt[i + 2];
}
if (i + 3 < size) {
c3 = stmt[i + 3];
}
if (i + 4 < size) {
c4 = stmt[i + 4];
}
if (i + 5 < size) {
c5 = stmt[i + 5];
}
if (i + 6 < size) {
c6 = stmt[i + 6];
}
if (-1 == symbol_start_index) {
2022-08-26 00:51:41 +08:00
is_number = 0;
if ((c0 >= '0') && (c0 <= '9')) {
is_number = 1;
}
2022-06-24 00:21:00 +08:00
symbol_start_index = i;
}
/* solve string */
if (0 == is_in_string) {
2022-08-30 18:47:49 +08:00
if ('\'' == c0) {
if ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2)) {
/* in ' */
is_in_string = 1;
continue;
}
2022-06-24 00:21:00 +08:00
}
2022-08-30 18:47:49 +08:00
if ('"' == c0) {
if ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2)) {
/* in "" */
is_in_string = 2;
continue;
}
2022-06-24 00:21:00 +08:00
}
}
if (1 == is_in_string) {
2022-08-30 18:47:49 +08:00
if ('\'' == c0 && ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2))) {
2022-06-24 00:21:00 +08:00
is_in_string = 0;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1,
&symbol_start_index);
}
continue;
}
if (2 == is_in_string) {
2022-08-30 18:47:49 +08:00
if ('"' == c0 && ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2))) {
2022-06-24 00:21:00 +08:00
is_in_string = 0;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1,
&symbol_start_index);
}
continue;
}
2022-08-26 00:51:41 +08:00
/* match annotation */
if ('#' == c0) {
break;
}
2022-06-24 00:21:00 +08:00
/* match devider*/
if (('(' == c0) || (')' == c0) || (',' == c0) || ('[' == c0) ||
(']' == c0) || (':' == c0) || ('{' == c0) || ('}' == c0)) {
2022-06-24 00:21:00 +08:00
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
char content[2] = {0};
content[0] = c0;
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_devider, content);
if (c0 == '(') {
bracket_deepth++;
}
if (c0 == ')') {
bracket_deepth--;
}
continue;
}
/* match operator */
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) ||
('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) ||
('%' == c0) || ('&' == c0) || ('|' == c0) || ('^' == c0) ||
('~' == c0)) {
2022-08-26 00:51:41 +08:00
if ('-' == c0 && is_number) {
if ((cn1 == 'e') || (cn1 == 'E')) {
continue;
}
}
2022-06-24 00:21:00 +08:00
if (('*' == c0) || ('/' == c0)) {
/*
=, **=, //
*/
if ((c0 == c1) && ('=' == c2)) {
char content[4] = {0};
content[0] = c0;
content[1] = c1;
content[2] = '=';
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 2;
continue;
}
}
/*
>>, <<, **, //
*/
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0)) {
if (c0 == c1) {
char content[3] = {0};
content[0] = c0;
content[1] = c1;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 1;
continue;
}
}
/*
>=, <=, *=, /=, +=, -=, !=, ==, %=
*/
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) ||
('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) ||
('%' == c0)) {
if ('=' == c1) {
char content[3] = {0};
content[0] = c0;
content[1] = c1;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 1;
continue;
}
}
/* single operator */
/*
+, -, *, ... /
*/
char content[2] = {0};
content[0] = c0;
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content);
continue;
}
// not the string operator
if ((cn1 >= 'a' && cn1 <= 'z') || (cn1 >= 'A' && cn1 <= 'Z') ||
(cn1 >= '0' && cn1 <= '9') || cn1 == '_' || cn1 == '.') {
goto after_match_string_operator;
}
2022-06-24 00:21:00 +08:00
/* not */
if ('n' == c0) {
if (('o' == c1) && ('t' == c2) && (' ' == c3)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " not ");
i = i + 3;
continue;
}
}
/* and */
if ('a' == c0) {
if (('n' == c1) && ('d' == c2) && (' ' == c3)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " and ");
i = i + 3;
continue;
}
}
/* or */
if ('o' == c0) {
if (('r' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " or ");
i = i + 2;
continue;
}
}
2022-08-26 00:51:41 +08:00
/* is */
if ('i' == c0) {
if (('s' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " is ");
i = i + 2;
continue;
}
}
/* in */
if ('i' == c0) {
if (('n' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " in ");
i = i + 2;
continue;
}
}
2022-06-24 00:21:00 +08:00
/* as */
if ('a' == c0) {
if (('s' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " as ");
i = i + 2;
continue;
}
}
/* import */
if ('i' == c0) {
if (('m' == c1) && ('p' == c2) && ('o' == c3) && ('r' == c4) &&
('t' == c5) && (' ' == c6)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " import ");
i = i + 5;
continue;
}
}
after_match_string_operator:
2022-06-24 00:21:00 +08:00
/* skip spaces */
if (' ' == c0) {
/* not get symbal */
if (i == symbol_start_index) {
2022-08-26 00:51:41 +08:00
symbol_start_index = -1;
2022-06-24 00:21:00 +08:00
} else {
/* already get symbal */
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
}
}
if (i == size - 1) {
/* last check symbel */
// if('\n' == c0){
// continue;
// }
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, size, &symbol_start_index);
}
}
/* output */
tokens = arg_getStr(tokens_arg);
tokens = strsCopy(outBuffs, tokens);
arg_deinit(tokens_arg);
return tokens;
}
char* Parser_popToken(Args* buffs_p, char* tokens) {
return strsPopToken(buffs_p, tokens, 0x1F);
}
enum TokenType Token_getType(char* token) {
return (enum TokenType)token[0];
}
char* Token_getPyload(char* token) {
return (char*)((uintptr_t)token + 1);
}
uint8_t Parser_isContainToken(char* tokens,
enum TokenType token_type,
char* pyload) {
Args buffs = {0};
char* tokens_buff = strsCopy(&buffs, tokens);
uint8_t res = 0;
uint16_t token_size = Tokens_getSize(tokens);
for (int i = 0; i < token_size; i++) {
char* token = Parser_popToken(&buffs, tokens_buff);
if (token_type == Token_getType(token)) {
if (strEqu(Token_getPyload(token), pyload)) {
res = 1;
goto exit;
}
}
}
exit:
strsDeinit(&buffs);
return res;
}
static const char operators[][9] = {
2022-08-26 00:51:41 +08:00
"**", "~", "*", "/", "%", "//", "+", "-",
">>", "<<", "&", "^", "|", "<", "<=", ">",
">=", "!=", "==", " is ", " in ", "%=", "/=", "//=",
"-=", "+=", "*=", "**=", " not ", " and ", " or ", " import "};
2022-06-24 00:21:00 +08:00
char* Lexer_getOperator(Args* outBuffs, char* stmt) {
Args buffs = {0};
char* operator= NULL;
2022-08-26 00:51:41 +08:00
char* tokens = Lexer_parseLine(&buffs, stmt);
// use parse state foreach to get operator
2022-06-24 00:21:00 +08:00
for (uint32_t i = 0; i < sizeof(operators) / 9; i++) {
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, tokens) {
Cursor_iterStart(&cs);
// get operator
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token2.pyload, (char*)operators[i])) {
// solve the iuuse of "~-1"
operator= strsCopy(&buffs, (char*)operators[i]);
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
break;
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
};
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
}
/* solve the iuuse of "~-1" */
if (strEqu(operator, "-")) {
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (strEqu(cs.token2.pyload, "-")) {
if (cs.token1.type == TOKEN_operator) {
operator= strsCopy(&buffs, cs.token1.pyload);
Cursor_iterEnd(&cs);
break;
}
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
};
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
}
2022-06-24 00:21:00 +08:00
/* match the last operator in equal level */
if ((strEqu(operator, "+")) || (strEqu(operator, "-"))) {
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (strEqu(cs.token1.pyload, "+")) {
2022-06-24 00:21:00 +08:00
operator= strsCopy(&buffs, "+");
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "-")) {
2022-06-24 00:21:00 +08:00
operator= strsCopy(&buffs, "-");
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
}
/* out put */
if (NULL == operator) {
return NULL;
}
2022-06-24 00:21:00 +08:00
operator= strsCopy(outBuffs, operator);
strsDeinit(&buffs);
return operator;
}
const char void_str[] = "";
void LexToken_update(struct LexToken* lex_token) {
lex_token->type = Token_getType(lex_token->token);
if (lex_token->type == TOKEN_strEnd) {
lex_token->pyload = (char*)void_str;
} else {
lex_token->pyload = Token_getPyload(lex_token->token);
}
}
2022-09-16 21:01:18 +08:00
void Cursor_iterStart(struct Cursor* cs) {
cs->iter_index++;
cs->iter_buffs = New_strBuff();
2022-06-24 00:21:00 +08:00
/* token1 is the last token */
2022-09-16 21:01:18 +08:00
cs->token1.token = strsCopy(cs->iter_buffs, arg_getStr(cs->last_token));
2022-06-24 00:21:00 +08:00
/* token2 is the next token */
2022-09-16 21:01:18 +08:00
cs->token2.token = Parser_popToken(cs->iter_buffs, cs->tokens);
2022-06-24 00:21:00 +08:00
/* store last token */
2022-09-16 21:01:18 +08:00
arg_deinit(cs->last_token);
cs->last_token = arg_newStr(cs->token2.token);
2022-06-24 00:21:00 +08:00
2022-09-16 21:01:18 +08:00
LexToken_update(&cs->token1);
LexToken_update(&cs->token2);
if (strEqu(cs->token1.pyload, "(")) {
cs->branket_deepth++;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs->token1.pyload, ")")) {
cs->branket_deepth--;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs->token1.pyload, "[")) {
cs->branket_deepth++;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs->token1.pyload, "]")) {
cs->branket_deepth--;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs->token1.pyload, "{")) {
cs->branket_deepth++;
2022-08-26 00:51:41 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs->token1.pyload, "}")) {
cs->branket_deepth--;
2022-08-26 00:51:41 +08:00
}
2022-06-24 00:21:00 +08:00
}
void LexToken_init(struct LexToken* lt) {
lt->pyload = NULL;
lt->token = NULL;
lt->type = TOKEN_strEnd;
}
2022-09-16 21:01:18 +08:00
void Cursor_init(struct Cursor* cs) {
cs->tokens = NULL;
cs->length = 0;
cs->iter_index = 0;
cs->branket_deepth = 0;
cs->last_token = NULL;
cs->iter_buffs = NULL;
cs->buffs_p = New_strBuff();
cs->result = PIKA_RES_OK;
LexToken_init(&cs->token1);
LexToken_init(&cs->token2);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
void Cursor_iterEnd(struct Cursor* cs) {
args_deinit(cs->iter_buffs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
void Cursor_deinit(struct Cursor* cs) {
if (NULL != cs->last_token) {
arg_deinit(cs->last_token);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
args_deinit(cs->buffs_p);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
void Cursor_parse(struct Cursor* cs, char* stmt) {
if (NULL == stmt) {
2022-09-16 21:01:18 +08:00
cs->result = PIKA_RES_ERR_SYNTAX_ERROR;
return;
}
2022-09-16 21:01:18 +08:00
cs->tokens = Lexer_parseLine(cs->buffs_p, stmt);
if (NULL == cs->tokens) {
cs->result = PIKA_RES_ERR_SYNTAX_ERROR;
return;
}
2022-09-16 21:01:18 +08:00
cs->length = Tokens_getSize(cs->tokens);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
void Cursor_beforeIter(struct Cursor* cs) {
2022-06-24 00:21:00 +08:00
/* clear first token */
2022-09-16 21:01:18 +08:00
if (cs->result != PIKA_RES_OK) {
return;
}
2022-09-16 21:01:18 +08:00
Parser_popToken(cs->buffs_p, cs->tokens);
cs->last_token = arg_newStr(Parser_popToken(cs->buffs_p, cs->tokens));
}
char* Cursor_popToken(Args* buffs, char** tokens, char* devide) {
Arg* out_item = arg_newStr("");
Arg* tokens_after = arg_newStr("");
PIKA_BOOL is_find_devide = PIKA_FALSE;
Cursor_forEachToken(cs, *tokens) {
Cursor_iterStart(&cs);
if ((cs.branket_deepth == 0 && strEqu(cs.token1.pyload, devide)) ||
cs.iter_index == cs.length) {
is_find_devide = PIKA_TRUE;
Cursor_iterEnd(&cs);
continue;
}
if (!is_find_devide) {
out_item = arg_strAppend(out_item, cs.token1.pyload);
} else {
tokens_after = arg_strAppend(tokens_after, cs.token1.pyload);
}
Cursor_iterEnd(&cs);
}
Cursor_deinit(&cs);
/* cache out item */
char* out_item_str = strsCopy(buffs, arg_getStr(out_item));
arg_deinit(out_item);
/* cache tokens after */
char* token_after_str = strsCopy(buffs, arg_getStr(tokens_after));
arg_deinit(tokens_after);
/* update tokens */
*tokens = token_after_str;
return out_item_str;
2022-06-24 00:21:00 +08:00
}
static void Slice_getPars(Args* outBuffs,
char* inner,
char** pStart,
char** pEnd,
char** pStep) {
Args buffs = {0};
*pStart = "";
*pEnd = "";
*pStep = "";
/* slice */
uint8_t colon_i = 0;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, inner) {
Cursor_iterStart(&cs);
if (strEqu(cs.token1.pyload, ":") && cs.branket_deepth == 0) {
2022-06-24 00:21:00 +08:00
colon_i++;
goto iter_continue1;
}
if (colon_i == 0) {
2022-09-16 21:01:18 +08:00
*pStart = strsAppend(&buffs, *pStart, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
if (colon_i == 1) {
2022-09-16 21:01:18 +08:00
*pEnd = strsAppend(&buffs, *pEnd, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
if (colon_i == 2) {
2022-09-16 21:01:18 +08:00
*pStep = strsAppend(&buffs, *pStep, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
iter_continue1:
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
if (colon_i == 1) {
*pStep = "1";
if (strEqu(*pStart, "")) {
*pStart = "0";
}
if (strEqu(*pEnd, "")) {
*pEnd = "-1";
}
}
if (colon_i == 0) {
*pEnd = strsAppend(&buffs, *pStart, " + 1");
*pStep = "1";
}
/* slice with step */
/* output */
*pStart = strsCopy(outBuffs, *pStart);
*pEnd = strsCopy(outBuffs, *pEnd);
*pStep = strsCopy(outBuffs, *pStep);
/* clean */
strsDeinit(&buffs);
}
2022-09-16 21:01:18 +08:00
char* Suger_leftSlice(Args* outBuffs, char* right, char** left_p) {
2022-08-26 00:51:41 +08:00
#if !PIKA_SYNTAX_SLICE_ENABLE
return right;
#endif
2022-06-24 00:21:00 +08:00
/* init objects */
Args buffs = {0};
Arg* right_arg = arg_newStr("");
char* left = *left_p;
2022-06-24 00:21:00 +08:00
uint8_t is_in_brancket = 0;
args_setStr(&buffs, "inner", "");
uint8_t matched = 0;
char* right_res = NULL;
2022-06-24 00:21:00 +08:00
/* exit when NULL */
if (NULL == left) {
2022-06-24 00:21:00 +08:00
arg_deinit(right_arg);
right_arg = arg_setStr(right_arg, "", right);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* exit when not match
(symble|iteral)'['
*/
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, left) {
Cursor_iterStart(&cs);
if (strEqu(cs.token2.pyload, "[")) {
if (TOKEN_symbol == cs.token1.type ||
TOKEN_literal == cs.token1.type) {
2022-06-24 00:21:00 +08:00
matched = 1;
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
break;
}
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
if (!matched) {
/* not contain '[', return origin */
arg_deinit(right_arg);
right_arg = arg_setStr(right_arg, "", right);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* matched [] */
2022-09-16 21:01:18 +08:00
Cursor_forEachTokenExistPs(cs, left) {
Cursor_iterStart(&cs);
2022-06-24 00:21:00 +08:00
/* found '[' */
2022-09-16 21:01:18 +08:00
if ((TOKEN_devider == cs.token2.type) &&
(strEqu(cs.token2.pyload, "["))) {
2022-06-24 00:21:00 +08:00
/* get 'obj' from obj[] */
2022-09-16 21:01:18 +08:00
args_setStr(&buffs, "obj", cs.token1.pyload);
2022-06-24 00:21:00 +08:00
is_in_brancket = 1;
/* fond ']' */
2022-09-16 21:01:18 +08:00
} else if ((TOKEN_devider == cs.token2.type) &&
(strEqu(cs.token2.pyload, "]"))) {
2022-06-24 00:21:00 +08:00
is_in_brancket = 0;
char* inner = args_getStr(&buffs, "inner");
Arg* inner_arg = arg_newStr(inner);
2022-09-16 21:01:18 +08:00
inner_arg = arg_strAppend(inner_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
args_setStr(&buffs, "inner", arg_getStr(inner_arg));
arg_deinit(inner_arg);
/* update inner pointer */
inner = args_getStr(&buffs, "inner");
char* start = NULL;
char* end = NULL;
char* step = NULL;
Slice_getPars(&buffs, inner, &start, &end, &step);
/* obj = __setitem__(obj, key, val) */
right_arg = arg_strAppend(right_arg, "__setitem__(");
2022-06-24 00:21:00 +08:00
right_arg = arg_strAppend(right_arg, args_getStr(&buffs, "obj"));
right_arg = arg_strAppend(right_arg, ",");
right_arg = arg_strAppend(right_arg, start);
right_arg = arg_strAppend(right_arg, ",");
right_arg = arg_strAppend(right_arg, right);
2022-06-24 00:21:00 +08:00
right_arg = arg_strAppend(right_arg, ")");
/* clean the inner */
args_setStr(&buffs, "inner", "");
/* in brancket and found '[' */
2022-09-16 21:01:18 +08:00
} else if (is_in_brancket && (!strEqu(cs.token1.pyload, "["))) {
2022-06-24 00:21:00 +08:00
char* inner = args_getStr(&buffs, "inner");
Arg* index_arg = arg_newStr(inner);
2022-09-16 21:01:18 +08:00
index_arg = arg_strAppend(index_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
args_setStr(&buffs, "inner", arg_getStr(index_arg));
arg_deinit(index_arg);
/* out of brancket and not found ']' */
2022-09-16 21:01:18 +08:00
} else if (!is_in_brancket && (!strEqu(cs.token1.pyload, "]"))) {
if (TOKEN_strEnd != cs.token1.type) {
right_arg = arg_strAppend(right_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
/* clean the left */
for (size_t i = 0; i < strGetSize(left); i++) {
if (left[i] == '[') {
left[i] = '\0';
break;
}
}
2022-06-24 00:21:00 +08:00
exit:
/* clean and return */
right_res = strsCopy(outBuffs, arg_getStr(right_arg));
2022-06-24 00:21:00 +08:00
arg_deinit(right_arg);
strsDeinit(&buffs);
return right_res;
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
char* Suger_format(Args* outBuffs, char* right) {
2022-08-26 00:51:41 +08:00
#if !PIKA_SYNTAX_FORMAT_ENABLE
return right;
#endif
2022-06-24 00:21:00 +08:00
/* quick skip */
if (!strIsContain(right, '%')) {
return right;
}
PIKA_BOOL is_format = PIKA_FALSE;
2022-08-26 00:51:41 +08:00
Cursor_forEachToken(ps1, right) {
Cursor_iterStart(&ps1);
2022-06-24 00:21:00 +08:00
if (ps1.branket_deepth == 0 && strEqu(ps1.token1.pyload, "%")) {
is_format = PIKA_TRUE;
}
2022-08-26 00:51:41 +08:00
Cursor_iterEnd(&ps1);
2022-06-24 00:21:00 +08:00
}
2022-08-26 00:51:41 +08:00
Cursor_deinit(&ps1);
2022-06-24 00:21:00 +08:00
if (PIKA_FALSE == is_format) {
return right;
}
char* res = right;
Arg* str_buf = arg_newStr("");
Arg* var_buf = arg_newStr("");
2022-06-24 00:21:00 +08:00
PIKA_BOOL is_in_format = PIKA_FALSE;
PIKA_BOOL is_tuple = PIKA_FALSE;
PIKA_BOOL is_out_vars = PIKA_FALSE;
Args buffs = {0};
char* fmt = NULL;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, right) {
2022-06-24 00:21:00 +08:00
char* item = "";
2022-09-16 21:01:18 +08:00
Cursor_iterStart(&cs);
2022-06-24 00:21:00 +08:00
if (PIKA_FALSE == is_in_format) {
2022-09-16 21:01:18 +08:00
if (cs.token1.type != TOKEN_literal) {
item = cs.token1.pyload;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (cs.token1.pyload[0] != '\'' && cs.token1.pyload[0] != '"') {
item = cs.token1.pyload;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
2022-09-16 21:01:18 +08:00
if (!strEqu(cs.token2.pyload, "%")) {
item = cs.token1.pyload;
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
/* found the format stmt */
is_in_format = PIKA_TRUE;
2022-09-16 21:01:18 +08:00
fmt = strsCopy(&buffs, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
goto iter_continue;
}
if (PIKA_TRUE == is_in_format) {
/* check the format vars */
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, "%")) {
2022-06-24 00:21:00 +08:00
/* is a tuple */
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token2.pyload, "(")) {
2022-06-24 00:21:00 +08:00
is_tuple = PIKA_TRUE;
} else {
2022-09-16 21:01:18 +08:00
var_buf = arg_strAppend(var_buf, cs.token2.pyload);
2022-06-24 00:21:00 +08:00
}
goto iter_continue;
}
/* found the end of tuple */
2022-09-16 21:01:18 +08:00
if (cs.iter_index == cs.length) {
2022-06-24 00:21:00 +08:00
is_out_vars = PIKA_TRUE;
is_in_format = PIKA_FALSE;
} else {
/* push the vars inner the tuple */
2022-09-16 21:01:18 +08:00
var_buf = arg_strAppend(var_buf, cs.token2.pyload);
2022-06-24 00:21:00 +08:00
}
if (is_out_vars) {
if (is_tuple) {
str_buf = arg_strAppend(str_buf, "cformat(");
str_buf = arg_strAppend(str_buf, fmt);
str_buf = arg_strAppend(str_buf, ",");
str_buf = arg_strAppend(str_buf, arg_getStr(var_buf));
} else {
str_buf = arg_strAppend(str_buf, "cformat(");
str_buf = arg_strAppend(str_buf, fmt);
str_buf = arg_strAppend(str_buf, ",");
str_buf = arg_strAppend(str_buf, arg_getStr(var_buf));
str_buf = arg_strAppend(str_buf, ")");
}
}
}
iter_continue:
if (!is_in_format) {
str_buf = arg_strAppend(str_buf, item);
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
res = strsCopy(outBuffs, arg_getStr(str_buf));
arg_deinit(str_buf);
arg_deinit(var_buf);
strsDeinit(&buffs);
return res;
}
2022-09-16 21:01:18 +08:00
uint8_t Suger_selfOperator(Args* outbuffs,
char* stmt,
char** right_p,
char** left_p) {
2022-06-24 00:21:00 +08:00
char* left_new = NULL;
char* right_new = NULL;
Arg* left_arg = arg_newStr("");
Arg* right_arg = arg_newStr("");
Arg* right_arg_new = arg_newStr("");
2022-06-24 00:21:00 +08:00
uint8_t is_left_exist = 0;
Args buffs = {0};
char _operator[2] = {0};
char* operator=(char*) _operator;
2022-08-26 00:51:41 +08:00
char* tokens = Lexer_parseLine(&buffs, stmt);
2022-06-24 00:21:00 +08:00
uint8_t is_right = 0;
if (Parser_isContainToken(tokens, TOKEN_operator, "+=")) {
operator[0] = '+';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "-=")) {
operator[0] = '-';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "*=")) {
operator[0] = '*';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "/=")) {
operator[0] = '/';
}
/* not found self operator */
if (operator[0] == 0) {
goto exit;
}
/* found self operator */
is_left_exist = 1;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if ((strEqu(cs.token1.pyload, "*=")) ||
(strEqu(cs.token1.pyload, "/=")) ||
(strEqu(cs.token1.pyload, "+=")) ||
(strEqu(cs.token1.pyload, "-="))) {
2022-06-24 00:21:00 +08:00
is_right = 1;
goto iter_continue;
}
if (!is_right) {
2022-09-16 21:01:18 +08:00
left_arg = arg_strAppend(left_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
} else {
2022-09-16 21:01:18 +08:00
right_arg = arg_strAppend(right_arg, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
iter_continue:
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
/* connect right */
right_arg_new = arg_strAppend(right_arg_new, arg_getStr(left_arg));
right_arg_new = arg_strAppend(right_arg_new, operator);
right_arg_new = arg_strAppend(right_arg_new, "(");
right_arg_new = arg_strAppend(right_arg_new, arg_getStr(right_arg));
right_arg_new = arg_strAppend(right_arg_new, ")");
/* collect left_new and right_new */
left_new = arg_getStr(left_arg);
right_new = arg_getStr(right_arg_new);
exit:
strsDeinit(&buffs);
if (NULL != right_new) {
*(right_p) = strsCopy(outbuffs, right_new);
;
}
if (NULL != left_new) {
*(left_p) = strsCopy(outbuffs, left_new);
}
arg_deinit(right_arg);
arg_deinit(left_arg);
arg_deinit(right_arg_new);
return is_left_exist;
}
2022-09-16 21:01:18 +08:00
PIKA_RES AST_setNodeAttr(AST* ast, char* attr_name, char* attr_val) {
return obj_setStr(ast, attr_name, attr_val);
}
char* AST_getNodeAttr(AST* ast, char* attr_name) {
return obj_getStr(ast, attr_name);
}
PIKA_RES AST_setNodeBlock(AST* ast, char* node_content) {
return AST_setNodeAttr(ast, "block", node_content);
}
char* AST_getThisBlock(AST* ast) {
return obj_getStr(ast, "block");
}
AST* AST_parseStmt(AST* ast, char* stmt);
PIKA_RES AST_parseSubStmt(AST* ast, char* node_content) {
queueObj_pushObj(ast, (char*)"stmt");
AST_parseStmt(queueObj_getCurrentObj(ast), node_content);
return PIKA_RES_OK;
}
char* Parser_popSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) {
Arg* substmt_arg = arg_newStr("");
Arg* newstmt_arg = arg_newStr("");
char* stmt = *stmt_p;
PIKA_BOOL is_get_substmt = PIKA_FALSE;
Args buffs = {0};
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (is_get_substmt) {
/* get new stmt */
2022-09-16 21:01:18 +08:00
newstmt_arg = arg_strAppend(newstmt_arg, cs.token1.pyload);
Cursor_iterEnd(&cs);
continue;
}
2022-09-16 21:01:18 +08:00
if (cs.branket_deepth > 0) {
/* ignore */
2022-09-16 21:01:18 +08:00
substmt_arg = arg_strAppend(substmt_arg, cs.token1.pyload);
Cursor_iterEnd(&cs);
continue;
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, delimiter)) {
/* found delimiter */
is_get_substmt = PIKA_TRUE;
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
continue;
}
/* collect substmt */
2022-09-16 21:01:18 +08:00
substmt_arg = arg_strAppend(substmt_arg, cs.token1.pyload);
Cursor_iterEnd(&cs);
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
strsDeinit(&buffs);
char* substmt = strsCacheArg(outbuffs, substmt_arg);
char* newstmt = strsCacheArg(outbuffs, newstmt_arg);
*stmt_p = newstmt;
return substmt;
}
char* Parser_popLastSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) {
uint8_t last_stmt_i = 0;
char* stmt = *stmt_p;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (strIsStartWith(cs.token1.pyload, delimiter)) {
/* found delimiter */
2022-09-16 21:01:18 +08:00
if (!strEqu(delimiter, "[") && cs.branket_deepth > 0) {
/* ignore */
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
continue;
}
/* for "[" */
2022-09-16 21:01:18 +08:00
if (cs.branket_deepth > 1) {
/* ignore */
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
continue;
}
2022-09-16 21:01:18 +08:00
last_stmt_i = cs.iter_index;
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
Arg* mainStmt = arg_newStr("");
Arg* lastStmt = arg_newStr("");
{
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (cs.iter_index < last_stmt_i) {
mainStmt = arg_strAppend(mainStmt, cs.token1.pyload);
}
2022-09-16 21:01:18 +08:00
if (cs.iter_index >= last_stmt_i) {
lastStmt = arg_strAppend(lastStmt, cs.token1.pyload);
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
}
*stmt_p = strsCacheArg(outbuffs, mainStmt);
return strsCacheArg(outbuffs, lastStmt);
}
2022-08-26 00:51:41 +08:00
static void _AST_parse_list(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_BUILTIN_STRUCT_ENABLE
return;
#endif
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"list", "list");
2022-08-26 00:51:41 +08:00
char* subStmts = strsCut(buffs, stmt, '[', ']');
subStmts = strsAppend(buffs, subStmts, ",");
while (1) {
char* subStmt = Parser_popSubStmt(buffs, &subStmts, ",");
AST_parseSubStmt(ast, subStmt);
if (strEqu(subStmts, "")) {
break;
}
}
return;
}
static void _AST_parse_dict(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_BUILTIN_STRUCT_ENABLE
return;
#endif
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"dict", "dict");
2022-08-26 00:51:41 +08:00
char* subStmts = strsCut(buffs, stmt, '{', '}');
subStmts = strsAppend(buffs, subStmts, ",");
while (1) {
char* subStmt = Parser_popSubStmt(buffs, &subStmts, ",");
char* key = Parser_popSubStmt(buffs, &subStmt, ":");
char* value = subStmt;
AST_parseSubStmt(ast, key);
AST_parseSubStmt(ast, value);
if (strEqu(subStmts, "")) {
break;
}
}
}
static void _AST_parse_slice(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_SYNTAX_SLICE_ENABLE
return;
#endif
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"slice", "slice");
2022-08-26 00:51:41 +08:00
stmt = strsCopy(buffs, stmt);
char* laststmt = Parser_popLastSubStmt(buffs, &stmt, "[");
AST_parseSubStmt(ast, stmt);
char* slice_list = strsCut(buffs, laststmt, '[', ']');
pika_assert(slice_list != NULL);
slice_list = strsAppend(buffs, slice_list, ":");
int index = 0;
while (1) {
char* slice_str = Parser_popSubStmt(buffs, &slice_list, ":");
if (index == 0 && strEqu(slice_str, "")) {
AST_parseSubStmt(ast, "0");
} else if (index == 1 && strEqu(slice_str, "")) {
AST_parseSubStmt(ast, "-1");
} else {
AST_parseSubStmt(ast, slice_str);
}
index++;
if (strEqu("", slice_list)) {
break;
}
}
}
2022-06-24 00:21:00 +08:00
AST* AST_parseStmt(AST* ast, char* stmt) {
Args buffs = {0};
char* assignment = strsGetFirstToken(&buffs, stmt, '(');
char* method = NULL;
char* ref = NULL;
char* str = NULL;
char* num = NULL;
char* left = NULL;
char* right = NULL;
char* import = NULL;
PIKA_RES result = PIKA_RES_OK;
2022-06-24 00:21:00 +08:00
right = stmt;
/* solve check direct */
uint8_t isLeftExist = 0;
if (Parser_checkIsDirect(assignment)) {
isLeftExist = 1;
left = strsCopy(&buffs, "");
right = strsCopy(&buffs, "");
uint8_t is_meet_equ = 0;
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (strEqu(cs.token1.pyload, "=") &&
cs.token1.type == TOKEN_operator) {
2022-06-24 00:21:00 +08:00
is_meet_equ = 1;
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
continue;
}
if (0 == is_meet_equ) {
2022-09-16 21:01:18 +08:00
left = strsAppend(&buffs, left, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
if (1 == is_meet_equ) {
2022-09-16 21:01:18 +08:00
right = strsAppend(&buffs, right, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
}
/* solve the += -= /= *= stmt */
if (!isLeftExist) {
2022-09-16 21:01:18 +08:00
isLeftExist = Suger_selfOperator(&buffs, stmt, &right, &left);
2022-06-24 00:21:00 +08:00
}
/* solve the [] stmt */
2022-09-16 21:01:18 +08:00
right = Suger_leftSlice(&buffs, right, &left);
right = Suger_format(&buffs, right);
2022-06-24 00:21:00 +08:00
/* set left */
if (isLeftExist) {
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"left", left);
2022-06-24 00:21:00 +08:00
}
/* match statment type */
enum StmtType stmtType = Lexer_matchStmtType(right);
/* solve operator stmt */
if (STMT_operator == stmtType) {
2022-09-24 21:02:44 +08:00
char* rightWithoutSubStmt = _remove_sub_stmt(&buffs, right);
2022-06-24 00:21:00 +08:00
char* operator= Lexer_getOperator(&buffs, rightWithoutSubStmt);
if (NULL == operator) {
result = PIKA_RES_ERR_SYNTAX_ERROR;
goto exit;
}
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"operator", operator);
2022-06-24 00:21:00 +08:00
char* rightBuff = strsCopy(&buffs, right);
char* subStmt1 =
strsPopTokenWithSkip_byStr(&buffs, rightBuff, operator, '(', ')');
char* subStmt2 = rightBuff;
AST_parseSubStmt(ast, subStmt1);
AST_parseSubStmt(ast, subStmt2);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve list stmt */
if (STMT_list == stmtType) {
2022-08-26 00:51:41 +08:00
_AST_parse_list(ast, &buffs, right);
goto exit;
}
/* solve dict stmt */
if (STMT_dict == stmtType) {
2022-08-26 00:51:41 +08:00
_AST_parse_dict(ast, &buffs, right);
goto exit;
}
/* solve method chain */
if (STMT_chain == stmtType) {
char* stmt = strsCopy(&buffs, right);
char* lastStmt = Parser_popLastSubStmt(&buffs, &stmt, ".");
AST_parseSubStmt(ast, stmt);
AST_parseStmt(ast, lastStmt);
goto exit;
}
if (STMT_slice == stmtType) {
/* solve slice stmt */
2022-08-26 00:51:41 +08:00
_AST_parse_slice(ast, &buffs, right);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve method stmt */
if (STMT_method == stmtType) {
method = strsGetFirstToken(&buffs, right, '(');
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"method", method);
2022-06-24 00:21:00 +08:00
char* subStmts = strsCut(&buffs, right, '(', ')');
pika_assert(NULL != subStmts);
2022-06-24 00:21:00 +08:00
/* add ',' at the end */
subStmts = strsAppend(&buffs, subStmts, ",");
while (1) {
char* substmt = Parser_popSubStmt(&buffs, &subStmts, ",");
AST_parseSubStmt(ast, substmt);
if (strEqu("", subStmts)) {
break;
2022-06-24 00:21:00 +08:00
}
}
goto exit;
}
/* solve reference stmt */
if (STMT_reference == stmtType) {
ref = right;
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"ref", ref);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve import stmt */
if (STMT_import == stmtType) {
import = strsGetLastToken(&buffs, right, ' ');
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"import", import);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve str stmt */
if (STMT_string == stmtType) {
str = strsCopy(&buffs, right);
/* remove the first char */
str = str + 1;
/* remove the last char */
str[strGetSize(str) - 1] = '\0';
/* replace */
if (strIsContain(str, '\\')) {
str = strsReplace(&buffs, str, "\\\"", "\"");
str = strsReplace(&buffs, str, "\\'", "'");
}
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"string", str);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve bytes stmt */
if (STMT_bytes == stmtType) {
str = right + 1;
str = strsDeleteChar(&buffs, str, '\'');
str = strsDeleteChar(&buffs, str, '\"');
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"bytes", str);
2022-06-24 00:21:00 +08:00
goto exit;
}
/* solve number stmt */
if (STMT_number == stmtType) {
num = right;
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, (char*)"num", num);
2022-06-24 00:21:00 +08:00
goto exit;
}
exit:
strsDeinit(&buffs);
if (result != PIKA_RES_OK) {
AST_deinit(ast);
return NULL;
}
2022-06-24 00:21:00 +08:00
return ast;
}
static int32_t Parser_getPyLineBlockDeepth(char* line) {
uint32_t size = strGetSize(line);
for (uint32_t i = 0; i < size; i++) {
if (line[i] != ' ') {
uint32_t spaceNum = i;
if (0 == spaceNum % 4) {
return spaceNum / 4;
}
/* space Num is not 4N, error*/
return -1;
}
}
return 0;
}
char* Parser_removeAnnotation(char* line) {
uint8_t is_annotation_exit = 0;
uint8_t is_in_single_quotes = 0;
uint8_t is_in_double_quotes_deepth = 0;
for (uint32_t i = 0; i < strGetSize(line); i++) {
if ('\'' == line[i]) {
is_in_single_quotes = !is_in_single_quotes;
continue;
}
if ('"' == line[i]) {
is_in_double_quotes_deepth = !is_in_double_quotes_deepth;
continue;
}
if (!(is_in_single_quotes == 0 && is_in_double_quotes_deepth == 0)) {
continue;
}
if ('#' == line[i]) {
/* end the line */
line[i] = 0;
is_annotation_exit = 1;
break;
}
}
/* no annotation, exit */
if (!is_annotation_exit) {
return line;
}
/* check empty line */
for (uint32_t i = 0; i < strGetSize(line); i++) {
if (' ' != line[i]) {
return line;
}
}
/* is an emply line */
line = "@annontation";
return line;
}
2022-09-16 21:01:18 +08:00
char* _defGetDefault(Args* outBuffs, char** dec_out) {
Args buffs = {0};
char* dec_str = strsCopy(&buffs, *dec_out);
char* fn_name = strsGetFirstToken(&buffs, dec_str, '(');
Arg* dec_arg = arg_strAppend(arg_newStr(fn_name), "(");
Arg* default_arg = arg_newStr("");
char* arg_list = strsCut(&buffs, dec_str, '(', ')');
int arg_num = strCountSign(arg_list, ',') + 1;
for (int i = 0; i < arg_num; i++) {
char* arg_str = strsPopToken(&buffs, arg_list, ',');
int is_default = 0;
if (strIsContain(arg_str, '=')) {
default_arg = arg_strAppend(default_arg, arg_str);
default_arg = arg_strAppend(default_arg, ",");
arg_str = strsPopToken(&buffs, arg_str, '=');
is_default = 1;
}
dec_arg = arg_strAppend(dec_arg, arg_str);
if (is_default) {
dec_arg = arg_strAppend(dec_arg, "=");
}
dec_arg = arg_strAppend(dec_arg, ",");
}
strPopLastToken(arg_getStr(dec_arg), ',');
dec_arg = arg_strAppend(dec_arg, ")");
*dec_out = strsCopy(outBuffs, arg_getStr(dec_arg));
char* default_out = strsCopy(outBuffs, arg_getStr(default_arg));
strPopLastToken(default_out, ',');
arg_deinit(dec_arg);
arg_deinit(default_arg);
strsDeinit(&buffs);
return default_out;
}
static char* Suger_multiReturn(Args* out_buffs, char* line) {
#if PIKA_NANO_ENABLE
return line;
#endif
Cursor_forEachToken(cs, line) {
Cursor_iterStart(&cs);
if (cs.branket_deepth == 0 && strEqu(cs.token1.pyload, ",")) {
line = strsFormat(out_buffs, strGetSize(line) + 3, "(%s)", line);
Cursor_iterEnd(&cs);
break;
}
Cursor_iterEnd(&cs);
}
Cursor_deinit(&cs);
return line;
}
2022-06-24 00:21:00 +08:00
/* match block start keywords */
const char control_keywords[][9] = {"break", "continue"};
/* normal keyward */
const char normal_keywords[][7] = {"while", "if", "elif"};
2022-09-16 21:01:18 +08:00
AST* AST_parseLine_withBlockStack_withBlockDeepth(char* line,
Stack* block_stack,
int block_deepth) {
Stack s;
stack_init(&s);
if (block_stack == NULL) {
block_stack = &s;
}
2022-06-24 00:21:00 +08:00
/* line is not exist */
if (line == NULL) {
return NULL;
}
/* init data */
AST* ast = New_queueObj();
Args buffs = {0};
int8_t block_deepth_now, block_deepth_last = -1;
char *line_start, *stmt;
/* get block deepth */
block_deepth_now = Parser_getPyLineBlockDeepth(line);
/* set block deepth */
if (block_deepth_now == -1) {
/* get block_deepth error */
__platform_printf(
"IndentationError: unexpected indent, only support 4 spaces\r\n");
obj_deinit(ast);
ast = NULL;
goto exit;
}
2022-09-16 21:01:18 +08:00
block_deepth_now += block_deepth;
2022-06-24 00:21:00 +08:00
obj_setInt(ast, "blockDeepth", block_deepth_now);
/* check if exit block */
2022-09-16 21:01:18 +08:00
block_deepth_last = stack_getTop(block_stack) + block_deepth;
/* exit each block */
for (int i = 0; i < block_deepth_last - block_deepth_now; i++) {
QueueObj* exit_block_queue = obj_getObj(ast, "exitBlock");
/* create an exit_block queue */
if (NULL == exit_block_queue) {
obj_newObj(ast, "exitBlock", "", New_TinyObj);
exit_block_queue = obj_getObj(ast, "exitBlock");
queueObj_init(exit_block_queue);
}
char buff[10] = {0};
char* block_type = stack_popStr(block_stack, buff);
/* push exit block type to exit_block queue */
queueObj_pushStr(exit_block_queue, block_type);
}
line_start = line + (block_deepth_now - block_deepth) * 4;
2022-06-24 00:21:00 +08:00
stmt = line_start;
// "while" "if" "elif"
for (uint32_t i = 0; i < sizeof(normal_keywords) / 7; i++) {
char* keyword = (char*)normal_keywords[i];
uint8_t keyword_len = strGetSize(keyword);
if (strIsStartWith(line_start, keyword) &&
(line_start[keyword_len] == ' ')) {
stmt = strsCut(&buffs, line_start, ' ', ':');
2022-09-16 21:01:18 +08:00
AST_setNodeBlock(ast, keyword);
stack_pushStr(block_stack, keyword);
2022-06-24 00:21:00 +08:00
goto block_matched;
}
}
/* contral keyward */
/* "break", "continue" */
for (uint32_t i = 0; i < sizeof(control_keywords) / 8; i++) {
char* keyward = (char*)control_keywords[i];
uint8_t keyward_size = strGetSize(keyward);
if ((strIsStartWith(line_start, keyward)) &&
((line_start[keyward_size] == ' ') ||
(line_start[keyward_size] == 0))) {
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, keyward, "");
2022-06-24 00:21:00 +08:00
stmt = "";
goto block_matched;
}
}
/* for */
if (strIsStartWith(line_start, "for ")) {
Args* list_buffs = New_strBuff();
char* line_buff = strsCopy(list_buffs, line_start + 4);
2022-09-16 21:01:18 +08:00
line_buff = strsGetCleanCmd(list_buffs, line_buff);
2022-08-26 00:51:41 +08:00
if (strCountSign(line_buff, ':') < 1) {
args_deinit(list_buffs);
obj_deinit(ast);
ast = NULL;
goto exit;
}
2022-06-24 00:21:00 +08:00
char* arg_in = strsPopToken(list_buffs, line_buff, ' ');
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "arg_in", arg_in);
2022-06-24 00:21:00 +08:00
strsPopToken(list_buffs, line_buff, ' ');
char* list_in = strsPopToken(list_buffs, line_buff, ':');
list_in = strsAppend(list_buffs, "iter(", list_in);
list_in = strsAppend(list_buffs, list_in, ")");
list_in = strsCopy(&buffs, list_in);
args_deinit(list_buffs);
2022-09-16 21:01:18 +08:00
AST_setNodeBlock(ast, "for");
AST_setNodeAttr(ast, "list_in", list_in);
stack_pushStr(block_stack, "for");
2022-06-24 00:21:00 +08:00
stmt = list_in;
goto block_matched;
}
/* else */
if (strIsStartWith(line_start, "else")) {
if ((line_start[4] == ' ') || (line_start[4] == ':')) {
stmt = "";
2022-09-16 21:01:18 +08:00
AST_setNodeBlock(ast, "else");
stack_pushStr(block_stack, "else");
2022-06-24 00:21:00 +08:00
}
goto block_matched;
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
/* try */
if (strIsStartWith(line_start, "try")) {
if ((line_start[3] == ' ') || (line_start[3] == ':')) {
stmt = "";
2022-09-16 21:01:18 +08:00
AST_setNodeBlock(ast, "try");
stack_pushStr(block_stack, "try");
}
goto block_matched;
}
/* except */
if (strIsStartWith(line_start, "except")) {
if ((line_start[6] == ' ') || (line_start[6] == ':')) {
stmt = "";
2022-09-16 21:01:18 +08:00
AST_setNodeBlock(ast, "except");
stack_pushStr(block_stack, "except");
}
goto block_matched;
}
#endif
2022-06-24 00:21:00 +08:00
if (strEqu(line_start, "return")) {
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "return", "");
2022-06-24 00:21:00 +08:00
stmt = "";
goto block_matched;
}
if (strIsStartWith(line_start, "return ")) {
char* lineBuff = strsCopy(&buffs, line_start);
strsPopToken(&buffs, lineBuff, ' ');
stmt = lineBuff;
2022-09-16 21:01:18 +08:00
stmt = Suger_multiReturn(&buffs, stmt);
AST_setNodeAttr(ast, "return", "");
2022-06-24 00:21:00 +08:00
goto block_matched;
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
if (strEqu(line_start, "raise")) {
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "raise", "");
stmt = "RuntimeError";
goto block_matched;
}
if (strIsStartWith(line_start, "raise ")) {
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "raise", "");
char* lineBuff = strsCopy(&buffs, line_start);
strsPopToken(&buffs, lineBuff, ' ');
stmt = lineBuff;
if (strEqu("", stmt)) {
stmt = "RuntimeError";
}
2022-08-26 00:51:41 +08:00
goto block_matched;
}
/* assert */
if (strIsStartWith(line_start, "assert ")) {
stmt = "";
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "assert", "");
2022-08-26 00:51:41 +08:00
char* lineBuff = strsCopy(&buffs, line_start + 7);
/* assert expr [, msg] */
while (1) {
char* subStmt = Parser_popSubStmt(&buffs, &lineBuff, ",");
AST_parseSubStmt(ast, subStmt);
if (strEqu(lineBuff, "")) {
break;
}
}
goto block_matched;
}
#endif
2022-06-24 00:21:00 +08:00
if (strIsStartWith(line_start, "global ")) {
stmt = "";
char* global_list = line_start + 7;
global_list = strsGetCleanCmd(&buffs, global_list);
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "global", global_list);
goto block_matched;
}
if (strIsStartWith(line_start, "del ")) {
stmt = "";
char* del_dir = line_start + sizeof("del ") - 1;
del_dir = strsGetCleanCmd(&buffs, del_dir);
2022-09-16 21:01:18 +08:00
AST_setNodeAttr(ast, "del", del_dir);
2022-06-24 00:21:00 +08:00
goto block_matched;
}
if (strIsStartWith(line_start, (char*)"def ")) {
stmt = "";
2022-09-16 21:01:18 +08:00
char* declare = strsCut(&buffs, line_start, ' ', ':');
if (NULL == declare) {
2022-08-26 00:51:41 +08:00
obj_deinit(ast);
ast = NULL;
goto exit;
}
2022-09-16 21:01:18 +08:00
declare = strsGetCleanCmd(&buffs, declare);
char* defaultStmt = _defGetDefault(&buffs, &declare);
AST_setNodeBlock(ast, "def");
AST_setNodeAttr(ast, "declare", declare);
if (defaultStmt[0] != '\0') {
AST_setNodeAttr(ast, "default", defaultStmt);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
stack_pushStr(block_stack, "def");
2022-06-24 00:21:00 +08:00
goto block_matched;
}
if (strIsStartWith(line_start, (char*)"class ")) {
stmt = "";
2022-09-16 21:01:18 +08:00
char* declare = strsCut(&buffs, line_start, ' ', ':');
if (NULL == declare) {
2022-08-26 00:51:41 +08:00
obj_deinit(ast);
ast = NULL;
goto exit;
}
2022-09-16 21:01:18 +08:00
declare = strsGetCleanCmd(&buffs, declare);
AST_setNodeBlock(ast, "class");
AST_setNodeAttr(ast, "declare", declare);
stack_pushStr(block_stack, "class");
2022-06-24 00:21:00 +08:00
goto block_matched;
}
block_matched:
if (NULL == stmt) {
AST_deinit(ast);
ast = NULL;
goto exit;
}
2022-06-24 00:21:00 +08:00
stmt = strsGetCleanCmd(&buffs, stmt);
ast = AST_parseStmt(ast, stmt);
goto exit;
exit:
2022-09-16 21:01:18 +08:00
stack_deinit(&s);
2022-06-24 00:21:00 +08:00
strsDeinit(&buffs);
return ast;
}
2022-09-16 21:01:18 +08:00
static AST* AST_parseLine_withBlockStack(char* line, Stack* block_stack) {
return AST_parseLine_withBlockStack_withBlockDeepth(line, block_stack, 0);
}
static AST* AST_parseLine_withBlockDeepth(char* line, int block_deepth) {
return AST_parseLine_withBlockStack_withBlockDeepth(line, NULL,
block_deepth);
}
int AST_getBlockDeepthNow(AST* ast) {
return obj_getInt(ast, "blockDeepth");
}
AST* AST_parseLine(char* line) {
return AST_parseLine_withBlockStack(line, NULL);
}
static char* Suger_import(Args* buffs_p, char* line) {
2022-08-26 00:51:41 +08:00
#if !PIKA_SYNTAX_IMPORT_EX_ENABLE
return line;
#endif
2022-06-24 00:21:00 +08:00
Args buffs = {0};
char* line_out = line;
char* alias = NULL;
char* origin = NULL;
char* stmt = line + 7;
if (!strIsStartWith(line, "import ")) {
line_out = line;
goto exit;
}
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
2022-06-24 00:21:00 +08:00
/* defaut set the 'origin' as the first token */
2022-09-16 21:01:18 +08:00
if (cs.iter_index == 1) {
origin = strsCopy(&buffs, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token2.pyload, " as ")) {
origin = strsCopy(&buffs, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, " as ")) {
alias = strsCopy(&buffs, cs.token2.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
/* only import, not 'as' */
if (NULL == alias) {
line_out = line;
goto exit;
}
/* 'import' and 'as' */
line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s",
origin, alias, origin);
line_out = strsCopy(buffs_p, line_out);
goto exit;
exit:
strsDeinit(&buffs);
return line_out;
}
2022-09-16 21:01:18 +08:00
static PIKA_BOOL _check_is_multi_assign(char* arg_list) {
PIKA_BOOL res = PIKA_FALSE;
Cursor_forEachToken(cs, arg_list) {
Cursor_iterStart(&cs);
if ((cs.branket_deepth == 0 && strEqu(cs.token1.pyload, ","))) {
res = PIKA_TRUE;
}
Cursor_iterEnd(&cs);
}
Cursor_deinit(&cs);
return res;
}
static char* Suger_multiAssign(Args* out_buffs, char* line) {
#if PIKA_NANO_ENABLE
return line;
#endif
Args buffs = {0};
char* line_out = line;
PIKA_BOOL is_assign = PIKA_FALSE;
Arg* stmt = arg_newStr("");
Arg* out_list = arg_newStr("");
Arg* out_item = arg_newStr("");
Arg* line_out_arg = arg_newStr("");
char* line_item = NULL;
char* out_list_str = NULL;
int out_num = 0;
Cursor_forEachToken(cs, line) {
Cursor_iterStart(&cs);
if (cs.branket_deepth == 0 && strEqu(cs.token1.pyload, "=")) {
is_assign = PIKA_TRUE;
Cursor_iterEnd(&cs);
continue;
}
if (is_assign) {
stmt = arg_strAppend(stmt, cs.token1.pyload);
}
if (!is_assign) {
out_list = arg_strAppend(out_list, cs.token1.pyload);
}
Cursor_iterEnd(&cs);
}
Cursor_deinit(&cs);
if (!is_assign) {
line_out = line;
goto exit;
}
if (!_check_is_multi_assign(arg_getStr(out_list))) {
line_out = line;
goto exit;
}
line_item =
strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "$tmp= %s\n", arg_getStr(stmt));
line_out_arg = arg_strAppend(line_out_arg, line_item);
out_list_str = arg_getStr(out_list);
while (1) {
char* item = Cursor_popToken(&buffs, &out_list_str, ",");
if (item[0] == '\0') {
break;
}
char* line_item = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE,
"%s = $tmp[%d]\n", item, out_num);
line_out_arg = arg_strAppend(line_out_arg, line_item);
out_num++;
}
line_out_arg = arg_strAppend(line_out_arg, "del $tmp");
line_out = strsCopy(out_buffs, arg_getStr(line_out_arg));
exit:
arg_deinit(stmt);
arg_deinit(out_list);
arg_deinit(out_item);
arg_deinit(line_out_arg);
strsDeinit(&buffs);
return line_out;
}
static char* Suger_from(Args* buffs_p, char* line) {
2022-08-26 00:51:41 +08:00
#if !PIKA_SYNTAX_IMPORT_EX_ENABLE
return line;
#endif
2022-06-24 00:21:00 +08:00
Args buffs = {0};
char* line_out = line;
char* class = NULL;
char* module = NULL;
char* alias = NULL;
char* stmt = line + 5;
if (!strIsStartWith(line, "from ")) {
line_out = line;
goto exit;
}
2022-09-16 21:01:18 +08:00
Cursor_forEachToken(cs, stmt) {
Cursor_iterStart(&cs);
if (strEqu(cs.token2.pyload, " import ")) {
module = strsCopy(&buffs, cs.token1.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, " import ")) {
class = strsCopy(&buffs, cs.token2.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
if (strEqu(cs.token1.pyload, " as ")) {
alias = strsCopy(&buffs, cs.token2.pyload);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_iterEnd(&cs);
2022-06-24 00:21:00 +08:00
}
2022-09-16 21:01:18 +08:00
Cursor_deinit(&cs);
2022-06-24 00:21:00 +08:00
if (NULL == module) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
if (NULL == class) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
if (NULL == alias) {
alias = class;
}
/* skip PikaObj */
if (strEqu(module, "PikaObj")) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s.%s",
module, alias, module, class);
line_out = strsCopy(buffs_p, line_out);
exit:
strsDeinit(&buffs);
return line_out;
}
static char* Parser_linePreProcess(Args* buffs_p, char* line) {
line = Parser_removeAnnotation(line);
2022-06-24 00:21:00 +08:00
/* check syntex error */
if (Lexer_isError(line)) {
line = NULL;
goto exit;
}
/* process EOL */
line = strsDeleteChar(buffs_p, line, '\r');
line = Suger_import(buffs_p, line);
line = Suger_from(buffs_p, line);
2022-09-16 21:01:18 +08:00
line = Suger_multiAssign(buffs_p, line);
2022-06-24 00:21:00 +08:00
exit:
return line;
}
char* Parser_LineToAsm(Args* buffs_p, char* line, Stack* blockStack) {
char* ASM = NULL;
AST* ast = NULL;
uint8_t line_num = 0;
/* pre process */
line = Parser_linePreProcess(buffs_p, line);
if (NULL == line) {
/* preprocess error */
goto exit;
}
if (strEqu("@annontation", line)) {
ASM = "";
goto exit;
}
/*
solve more lines
preprocess may generate more lines
*/
line_num = strCountSign(line, '\n') + 1;
for (int i = 0; i < line_num; i++) {
char* single_line = strsPopToken(buffs_p, line, '\n');
/* parse tokens to AST */
2022-09-16 21:01:18 +08:00
ast = AST_parseLine_withBlockStack(single_line, blockStack);
2022-06-24 00:21:00 +08:00
/* gen ASM from AST */
if (ASM == NULL) {
2022-09-16 21:01:18 +08:00
ASM = AST_genAsm(ast, buffs_p);
2022-06-24 00:21:00 +08:00
} else {
2022-09-16 21:01:18 +08:00
ASM = strsAppend(buffs_p, ASM, AST_genAsm(ast, buffs_p));
2022-06-24 00:21:00 +08:00
}
if (NULL != ast) {
AST_deinit(ast);
}
}
exit:
return ASM;
}
static int Parser_isVoidLine(char* line) {
for (uint32_t i = 0; i < strGetSize(line); i++) {
if (line[i] != ' ') {
return 0;
}
}
return 1;
}
static uint8_t Parser_checkIsMultiComment(char* line) {
for (uint32_t i = 0; i < strGetSize(line); i++) {
/* not match ' or " */
if ((line[i] != '\'') && (line[i] != '"')) {
continue;
}
/* not match ''' or """ */
if (!((line[i + 1] == line[i]) && (line[i + 2] == line[i]))) {
continue;
}
/* check char befor the ''' or """ */
if (!((0 == i) || (line[i - 1] == ' '))) {
continue;
}
/* check char after the ''' or """ */
if (!((line[i + 3] == ' ') || (line[i + 3] == 0))) {
continue;
}
/* mached */
return 1;
}
/* not mached */
return 0;
}
2022-08-26 00:51:41 +08:00
static char* _Parser_linesToBytesOrAsm(Args* outBuffs,
ByteCodeFrame* bytecode_frame,
char* py_lines) {
2022-06-24 00:21:00 +08:00
Stack block_stack;
stack_init(&block_stack);
Arg* asm_buff = arg_newStr("");
2022-06-24 00:21:00 +08:00
uint32_t lines_offset = 0;
2022-08-26 00:51:41 +08:00
uint16_t lines_num = strCountSign(py_lines, '\n') + 1;
2022-06-24 00:21:00 +08:00
uint16_t lines_index = 0;
uint8_t is_in_multi_comment = 0;
Arg* line_connection_arg = arg_newStr("");
2022-06-24 00:21:00 +08:00
uint8_t is_line_connection = 0;
char* out_ASM = NULL;
2022-08-26 00:51:41 +08:00
char* single_ASM = NULL;
2022-06-24 00:21:00 +08:00
uint32_t line_size = 0;
/* parse each line */
while (1) {
lines_index++;
Args buffs = {0};
2022-08-26 00:51:41 +08:00
char* line_origin = NULL;
char* line = NULL;
/* add void line to the end */
if (lines_index >= lines_num + 1) {
line = "";
goto parse_line;
}
2022-06-24 00:21:00 +08:00
/* get single line by pop multiline */
2022-08-26 00:51:41 +08:00
line_origin = strsGetFirstToken(&buffs, py_lines + lines_offset, '\n');
2022-06-24 00:21:00 +08:00
2022-08-26 00:51:41 +08:00
line = strsCopy(&buffs, line_origin);
2022-06-24 00:21:00 +08:00
/* line connection */
if (is_line_connection) {
is_line_connection = 0;
line_connection_arg = arg_strAppend(line_connection_arg, line);
line = strsCopy(&buffs, arg_getStr(line_connection_arg));
/* reflash the line_connection_arg */
arg_deinit(line_connection_arg);
line_connection_arg = arg_newStr("");
2022-06-24 00:21:00 +08:00
}
/* check connection */
if ('\\' == line[strGetSize(line) - 1]) {
/* remove the '\\' */
line[strGetSize(line) - 1] = '\0';
is_line_connection = 1;
line_connection_arg = arg_strAppend(line_connection_arg, line);
goto next_line;
}
2022-08-26 00:51:41 +08:00
Cursor_forEachToken(c, line) {
Cursor_iterStart(&c);
Cursor_iterEnd(&c);
}
Cursor_deinit(&c);
/* auto connection */
if (lines_index < lines_num) {
if (c.branket_deepth > 0) {
line_connection_arg = arg_strAppend(line_connection_arg, line);
is_line_connection = 1;
goto next_line;
}
}
/* branket match failed */
if (c.branket_deepth != 0) {
single_ASM = NULL;
goto parse_after;
}
2022-06-24 00:21:00 +08:00
/* support Tab */
line = strsReplace(&buffs, line, "\t", " ");
2022-08-26 00:51:41 +08:00
/* remove \r */
line = strsReplace(&buffs, line, "\r", "");
2022-06-24 00:21:00 +08:00
/* filter for not end \n */
2022-08-26 00:51:41 +08:00
if (Parser_isVoidLine(line)) {
goto next_line;
2022-06-24 00:21:00 +08:00
}
/* filter for multiline comment ''' or """ */
if (Parser_checkIsMultiComment(line)) {
is_in_multi_comment = ~is_in_multi_comment;
goto next_line;
}
/* skipe multiline comment */
if (is_in_multi_comment) {
goto next_line;
}
2022-08-26 00:51:41 +08:00
parse_line:
2022-06-24 00:21:00 +08:00
/* parse single Line to Asm */
single_ASM = Parser_LineToAsm(&buffs, line, &block_stack);
2022-08-26 00:51:41 +08:00
parse_after:
2022-06-24 00:21:00 +08:00
if (NULL == single_ASM) {
out_ASM = NULL;
strsDeinit(&buffs);
goto exit;
}
if (NULL == bytecode_frame) {
/* store ASM */
asm_buff = arg_strAppend(asm_buff, single_ASM);
} else if (NULL == outBuffs) {
/* store ByteCode */
byteCodeFrame_appendFromAsm(bytecode_frame, single_ASM);
}
next_line:
2022-08-26 00:51:41 +08:00
if (lines_index < lines_num) {
line_size = strGetSize(line_origin);
lines_offset = lines_offset + line_size + 1;
}
2022-06-24 00:21:00 +08:00
strsDeinit(&buffs);
/* exit when finished */
2022-08-26 00:51:41 +08:00
if (lines_index >= lines_num + 1) {
2022-06-24 00:21:00 +08:00
break;
}
}
if (NULL != outBuffs) {
/* load stored ASM */
out_ASM = strsCopy(outBuffs, arg_getStr(asm_buff));
} else {
out_ASM = (char*)1;
}
goto exit;
exit:
if (NULL != asm_buff) {
arg_deinit(asm_buff);
}
if (NULL != line_connection_arg) {
arg_deinit(line_connection_arg);
}
stack_deinit(&block_stack);
return out_ASM;
};
2022-08-26 00:51:41 +08:00
char* Parser_linesToBytes(ByteCodeFrame* bf, char* py_lines) {
return _Parser_linesToBytesOrAsm(NULL, bf, py_lines);
}
int bytecodeFrame_fromLines(ByteCodeFrame* bytecode_frame, char* multi_line) {
if (NULL == Parser_linesToBytes(bytecode_frame, multi_line)) {
2022-06-24 00:21:00 +08:00
/* error */
return 1;
}
/* succeed */
return 0;
};
2022-08-26 00:51:41 +08:00
char* Parser_linesToAsm(Args* outBuffs, char* multi_line) {
return _Parser_linesToBytesOrAsm(outBuffs, NULL, multi_line);
2022-06-24 00:21:00 +08:00
}
char* Parser_fileToAsm(Args* outBuffs, char* filename) {
Args buffs = {0};
Arg* file_arg = arg_loadFile(NULL, filename);
pika_assert(NULL != file_arg);
if (NULL == file_arg) {
return NULL;
}
char* lines = (char*)arg_getBytes(file_arg);
/* replace the "\r\n" to "\n" */
lines = strsReplace(&buffs, lines, "\r\n", "\n");
/* clear the void line */
lines = strsReplace(&buffs, lines, "\n\n", "\n");
/* add '\n' at the end */
lines = strsAppend(&buffs, lines, "\n\n");
2022-08-26 00:51:41 +08:00
char* res = Parser_linesToAsm(&buffs, lines);
arg_deinit(file_arg);
2022-08-26 00:51:41 +08:00
res = strsCopy(outBuffs, res);
strsDeinit(&buffs);
return res;
}
2022-09-16 21:01:18 +08:00
char* AST_genAsm_sub(AST* ast, AST* subAst, Args* outBuffs, char* pikaAsm) {
2022-06-24 00:21:00 +08:00
int deepth = obj_getInt(ast, "deepth");
Args buffs = {0};
2022-08-26 00:51:41 +08:00
/* append each queue item */
2022-06-24 00:21:00 +08:00
while (1) {
QueueObj* subStmt = queueObj_popObj(subAst);
if (NULL == subStmt) {
break;
}
obj_setInt(ast, "deepth", deepth + 1);
2022-09-16 21:01:18 +08:00
pikaAsm = AST_genAsm_sub(ast, subStmt, &buffs, pikaAsm);
2022-06-24 00:21:00 +08:00
}
2022-08-26 00:51:41 +08:00
/* Byte code generate rules */
const GenRule rules_subAst[] = {
{.ins = "RUN", .type = VAL_DYNAMIC, .ast = "method"},
{.ins = "OPT", .type = VAL_DYNAMIC, .ast = "operator"},
{.ins = "BYT", .type = VAL_DYNAMIC, .ast = "bytes"},
{.ins = "NUM", .type = VAL_DYNAMIC, .ast = "num"},
{.ins = "IMP", .type = VAL_DYNAMIC, .ast = "import"},
{.ins = "REF", .type = VAL_DYNAMIC, .ast = "ref"},
{.ins = "STR", .type = VAL_DYNAMIC, .ast = "string"},
{.ins = "SLC", .type = VAL_NONEVAL, .ast = "slice"},
{.ins = "DCT", .type = VAL_NONEVAL, .ast = "dict"},
{.ins = "LST", .type = VAL_NONEVAL, .ast = "list"},
{.ins = "OUT", .type = VAL_DYNAMIC, .ast = "left"}};
2022-06-24 00:21:00 +08:00
char* buff = args_getBuff(&buffs, PIKA_SPRINTF_BUFF_SIZE);
/* append the syntax item */
2022-08-26 00:51:41 +08:00
for (size_t i = 0; i < sizeof(rules_subAst) / sizeof(GenRule); i++) {
GenRule rule = rules_subAst[i];
char* astNodeVal = obj_getStr(subAst, rule.ast);
if (NULL != astNodeVal) {
/* e.g. "0 RUN print \n" */
2022-08-26 00:51:41 +08:00
__platform_sprintf(buff, "%d %s ", deepth, rule.ins);
Arg* abuff = arg_newStr(buff);
2022-08-26 00:51:41 +08:00
if (rule.type == VAL_DYNAMIC) {
abuff = arg_strAppend(abuff, astNodeVal);
}
abuff = arg_strAppend(abuff, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(abuff));
arg_deinit(abuff);
}
2022-06-24 00:21:00 +08:00
}
2022-06-24 00:21:00 +08:00
obj_setInt(ast, "deepth", deepth - 1);
goto exit;
exit:
pikaAsm = strsCopy(outBuffs, pikaAsm);
strsDeinit(&buffs);
return pikaAsm;
}
char* ASM_addBlockDeepth(AST* ast,
Args* buffs_p,
char* pikaAsm,
uint8_t deepthOffset) {
pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"B");
char buff[11];
2022-09-16 21:01:18 +08:00
pikaAsm =
strsAppend(buffs_p, pikaAsm,
fast_itoa(buff, AST_getBlockDeepthNow(ast) + deepthOffset));
2022-06-24 00:21:00 +08:00
pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"\n");
return pikaAsm;
}
2022-08-26 00:51:41 +08:00
char* GenRule_toAsm(GenRule rule,
Args* buffs,
AST* ast,
char* pikaAsm,
int deepth) {
char* buff = args_getBuff(buffs, PIKA_SPRINTF_BUFF_SIZE);
/* parse stmt ast */
2022-09-16 21:01:18 +08:00
pikaAsm = AST_genAsm_sub(ast, ast, buffs, pikaAsm);
2022-08-26 00:51:41 +08:00
/* e.g. "0 CTN \n" */
__platform_sprintf(buff, "%d %s ", deepth, rule.ins);
Arg* abuff = arg_newStr(buff);
if (rule.type == VAL_DYNAMIC) {
abuff = arg_strAppend(abuff, obj_getStr(ast, rule.ast));
}
if (rule.type == VAL_STATIC_) {
abuff = arg_strAppend(abuff, rule.val);
}
abuff = arg_strAppend(abuff, "\n");
pikaAsm = strsAppend(buffs, pikaAsm, arg_getStr(abuff));
arg_deinit(abuff);
return pikaAsm;
}
2022-09-16 21:01:18 +08:00
char* AST_genAsm(AST* ast, Args* outBuffs) {
const GenRule rules_topAst[] = {
{.ins = "CTN", .type = VAL_NONEVAL, .ast = "continue"},
{.ins = "BRK", .type = VAL_NONEVAL, .ast = "break"},
{.ins = "DEL", .type = VAL_DYNAMIC, .ast = "del"},
{.ins = "GLB", .type = VAL_DYNAMIC, .ast = "global"},
{.ins = "RIS", .type = VAL_DYNAMIC, .ast = "raise"},
{.ins = "ASS", .type = VAL_NONEVAL, .ast = "assert"},
{.ins = "RET", .type = VAL_NONEVAL, .ast = "return"}};
/* generate code for block ast */
const GenRule rules_block[] = {
{.ins = "TRY", .type = VAL_NONEVAL, .ast = "try"},
{.ins = "EXP", .type = VAL_NONEVAL, .ast = "except"},
{.ins = "NEL", .type = VAL_STATIC_, .ast = "else", .val = "1"},
{.ins = "JEZ", .type = VAL_STATIC_, .ast = "if", .val = "1"},
{.ins = "JEZ", .type = VAL_STATIC_, .ast = "while", .val = "2"},
};
2022-06-24 00:21:00 +08:00
Args buffs = {0};
char* pikaAsm = strsCopy(&buffs, "");
QueueObj* exitBlock;
uint8_t is_block_matched;
if (NULL == ast) {
pikaAsm = NULL;
goto exit;
}
exitBlock = obj_getObj(ast, "exitBlock");
/* exiting from block */
if (exitBlock != NULL) {
while (1) {
uint8_t block_type_num = obj_getInt(exitBlock, "top") -
obj_getInt(exitBlock, "bottom") - 1;
char* block_type = queueObj_popStr(exitBlock);
if (NULL == block_type) {
break;
}
/* goto the while start when exit while block */
if (strEqu(block_type, "while")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n");
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
/* goto the while start when exit while block */
if (strEqu(block_type, "try")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NTR \n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 GER \n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JEZ 2\n");
}
if (strEqu(block_type, "except")) {
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 SER 0\n");
}
#endif
2022-06-24 00:21:00 +08:00
/* goto the while start when exit while block */
if (strEqu(block_type, "for")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n");
/* garbage collect for the list */
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
2022-09-16 21:01:18 +08:00
char _l_x[] = "$lx";
2022-06-24 00:21:00 +08:00
char block_deepth_char =
2022-09-16 21:01:18 +08:00
AST_getBlockDeepthNow(ast) + block_type_num + '0';
2022-06-24 00:21:00 +08:00
_l_x[sizeof(_l_x) - 2] = block_deepth_char;
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 DEL ");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)_l_x);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"\n");
}
/* return when exit method */
if (strEqu(block_type, "def")) {
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm,
block_type_num + 1);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n");
}
/* return when exit class */
if (strEqu(block_type, "class")) {
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm,
block_type_num + 1);
pikaAsm =
strsAppend(outBuffs, pikaAsm, (char*)"0 RAS $origin\n");
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 1);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NEW self\n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n");
}
}
}
/* add block deepth */
/* example: B0 */
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0);
/* "deepth" is invoke deepth, not the blockDeepth */
obj_setInt(ast, "deepth", 0);
/* match block */
is_block_matched = 0;
2022-09-16 21:01:18 +08:00
if (strEqu(AST_getThisBlock(ast), "for")) {
2022-06-24 00:21:00 +08:00
/* for "for" iter */
char* arg_in = obj_getStr(ast, "arg_in");
2022-09-16 21:01:18 +08:00
char* arg_in_kv = NULL;
Arg* newAsm_arg = arg_newStr("");
2022-09-16 21:01:18 +08:00
char _l_x[] = "$lx";
2022-06-24 00:21:00 +08:00
char block_deepth_char = '0';
2022-09-16 21:01:18 +08:00
block_deepth_char += AST_getBlockDeepthNow(ast);
2022-06-24 00:21:00 +08:00
_l_x[sizeof(_l_x) - 2] = block_deepth_char;
/* init iter */
/* get the iter(_l<x>) */
2022-09-16 21:01:18 +08:00
pikaAsm = AST_genAsm_sub(ast, ast, &buffs, pikaAsm);
2022-06-24 00:21:00 +08:00
newAsm_arg = arg_strAppend(newAsm_arg, "0 OUT ");
newAsm_arg = arg_strAppend(newAsm_arg, _l_x);
newAsm_arg = arg_strAppend(newAsm_arg, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg));
arg_deinit(newAsm_arg);
newAsm_arg = arg_newStr("");
2022-06-24 00:21:00 +08:00
/* get next */
/* run next(_l<x>) */
/* check item is exist */
2022-09-16 21:01:18 +08:00
/*
$n = $lx.next()
EST $n
k, v = $n
DEL $n
*/
if (_check_is_multi_assign(arg_in)) {
arg_in_kv = arg_in;
arg_in = "$tmp";
}
2022-06-24 00:21:00 +08:00
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0);
newAsm_arg = arg_strAppend(newAsm_arg, "0 RUN ");
newAsm_arg = arg_strAppend(newAsm_arg, _l_x);
newAsm_arg = arg_strAppend(newAsm_arg,
".__next__\n"
"0 OUT ");
newAsm_arg = arg_strAppend(newAsm_arg, arg_in);
newAsm_arg = arg_strAppend(newAsm_arg,
"\n"
"0 EST ");
newAsm_arg = arg_strAppend(newAsm_arg, arg_in);
newAsm_arg = arg_strAppend(newAsm_arg, "\n0 JEZ 2\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg));
arg_deinit(newAsm_arg);
2022-09-16 21:01:18 +08:00
if (NULL != arg_in_kv) {
int out_num = 0;
while (1) {
char* item = Cursor_popToken(&buffs, &arg_in_kv, ",");
if (item[0] == '\0') {
break;
}
char* stmt = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE,
"%s = $tmp[%d]\n", item, out_num);
AST* ast_this = AST_parseLine_withBlockDeepth(
stmt, AST_getBlockDeepthNow(ast) + 1);
pikaAsm =
strsAppend(&buffs, pikaAsm, AST_genAsm(ast_this, &buffs));
AST_deinit(ast_this);
out_num++;
}
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 1);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 DEL $tmp\n");
}
2022-06-24 00:21:00 +08:00
is_block_matched = 1;
goto exit;
}
2022-09-16 21:01:18 +08:00
if (strEqu(AST_getThisBlock(ast), "elif")) {
2022-06-24 00:21:00 +08:00
/* skip if __else is 0 */
pikaAsm = strsAppend(&buffs, pikaAsm, "0 NEL 1\n");
/* parse stmt ast */
2022-09-16 21:01:18 +08:00
pikaAsm = AST_genAsm_sub(ast, ast, &buffs, pikaAsm);
2022-06-24 00:21:00 +08:00
/* skip if stmt is 0 */
pikaAsm = strsAppend(&buffs, pikaAsm, "0 JEZ 1\n");
is_block_matched = 1;
goto exit;
}
2022-09-16 21:01:18 +08:00
if (strEqu(AST_getThisBlock(ast), "def")) {
char* defaultStmts = AST_getNodeAttr(ast, "default");
2022-06-24 00:21:00 +08:00
pikaAsm = strsAppend(&buffs, pikaAsm, "0 DEF ");
2022-09-16 21:01:18 +08:00
pikaAsm = strsAppend(&buffs, pikaAsm, AST_getNodeAttr(ast, "declare"));
2022-06-24 00:21:00 +08:00
pikaAsm = strsAppend(&buffs, pikaAsm,
"\n"
"0 JMP 1\n");
2022-09-16 21:01:18 +08:00
if (NULL != defaultStmts) {
int stmt_num = strGetTokenNum(defaultStmts, ',');
for (int i = 0; i < stmt_num; i++) {
char* stmt = strsPopToken(&buffs, defaultStmts, ',');
char* arg_name = strsGetFirstToken(&buffs, stmt, '=');
pikaAsm = ASM_addBlockDeepth(ast, &buffs, pikaAsm, 1);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 EST ");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_name);
pikaAsm = strsAppend(&buffs, pikaAsm, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, "0 JNZ 2\n");
AST* ast_this = AST_parseLine_withBlockDeepth(
stmt, AST_getBlockDeepthNow(ast) + 1);
pikaAsm =
strsAppend(&buffs, pikaAsm, AST_genAsm(ast_this, &buffs));
AST_deinit(ast_this);
}
}
2022-06-24 00:21:00 +08:00
is_block_matched = 1;
goto exit;
}
2022-09-16 21:01:18 +08:00
if (strEqu(AST_getThisBlock(ast), "class")) {
char* declare = obj_getStr(ast, "declare");
2022-06-24 00:21:00 +08:00
char* thisClass = NULL;
char* superClass = NULL;
2022-09-16 21:01:18 +08:00
if (strIsContain(declare, '(')) {
thisClass = strsGetFirstToken(&buffs, declare, '(');
superClass = strsCut(&buffs, declare, '(', ')');
2022-06-24 00:21:00 +08:00
} else {
2022-09-16 21:01:18 +08:00
thisClass = declare;
2022-06-24 00:21:00 +08:00
superClass = "";
}
if (strEqu("", superClass)) {
/* default superClass */
superClass = "TinyObj";
}
if (strEqu("TinyObj", superClass)) {
/* default superClass */
superClass = "TinyObj";
}
pikaAsm = strsAppend(&buffs, pikaAsm, "0 CLS ");
pikaAsm = strsAppend(&buffs, pikaAsm,
strsAppend(&buffs, thisClass,
"()\n"
"0 JMP 1\n"));
char block_deepth_str[] = "B0\n";
/* goto deeper block */
2022-09-16 21:01:18 +08:00
block_deepth_str[1] += AST_getBlockDeepthNow(ast) + 1;
2022-06-24 00:21:00 +08:00
pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 RUN ");
pikaAsm = strsAppend(&buffs, pikaAsm, superClass);
pikaAsm = strsAppend(&buffs, pikaAsm, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, "0 OUT self\n");
pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 RAS self\n");
is_block_matched = 1;
goto exit;
}
2022-08-26 00:51:41 +08:00
for (size_t i = 0; i < sizeof(rules_block) / sizeof(GenRule); i++) {
GenRule rule = rules_block[i];
2022-09-16 21:01:18 +08:00
if (strEqu(AST_getThisBlock(ast), rule.ast)) {
2022-08-26 00:51:41 +08:00
pikaAsm = GenRule_toAsm(rule, &buffs, ast, pikaAsm, 0);
is_block_matched = 1;
goto exit;
}
2022-06-24 00:21:00 +08:00
}
2022-08-26 00:51:41 +08:00
/* generate code for top level ast */
for (size_t i = 0; i < sizeof(rules_topAst) / sizeof(rules_topAst[0]);
i++) {
GenRule item = rules_topAst[i];
if (obj_isArgExist(ast, item.ast)) {
pikaAsm = GenRule_toAsm(item, &buffs, ast, pikaAsm, 0);
is_block_matched = 1;
goto exit;
}
2022-06-24 00:21:00 +08:00
}
exit:
if (NULL == pikaAsm) {
strsDeinit(&buffs);
return NULL;
}
if (!is_block_matched) {
/* parse stmt ast */
2022-09-16 21:01:18 +08:00
pikaAsm = AST_genAsm_sub(ast, ast, &buffs, pikaAsm);
2022-06-24 00:21:00 +08:00
}
/* output pikaAsm */
pikaAsm = strsCopy(outBuffs, pikaAsm);
strsDeinit(&buffs);
return pikaAsm;
}
int32_t AST_deinit(AST* ast) {
return obj_deinit(ast);
}
ByteCodeFrame* byteCodeFrame_appendFromAsm(ByteCodeFrame* self, char* pikaAsm) {
Asmer asmer = {
.asm_code = pikaAsm,
.block_deepth_now = 0,
.is_new_line = 0,
.line_pointer = pikaAsm,
};
uint16_t const_pool_offset;
uint16_t exist_offset;
int invoke_deepth_int = 0;
2022-06-24 00:21:00 +08:00
for (int i = 0; i < strCountSign(pikaAsm, '\n'); i++) {
Args buffs = {0};
char* line = strsGetLine(&buffs, asmer.line_pointer);
char* data = NULL;
char ins_str[4] = "";
char invoke_deepth[3] = "";
uint8_t space_num = 0;
uint8_t invoke_deepth_i = 0;
uint8_t ins_str_i = 0;
Arg* line_buff = arg_newStr(line);
2022-06-24 00:21:00 +08:00
strsDeinit(&buffs);
line = arg_getStr(line_buff);
InstructUnit ins_unit = {0};
/* remove '\r' */
if (line[strGetSize(line) - 1] == '\r') {
line[strGetSize(line) - 1] = 0;
}
/* process block deepth flag*/
if ('B' == line[0]) {
asmer.block_deepth_now = fast_atoi(line + 1);
2022-06-24 00:21:00 +08:00
asmer.is_new_line = 1;
goto next_line;
}
/* process each ins */
/* get constPool offset */
const_pool_offset = 0;
for (int i = 0; i < (int)strGetSize(line); i++) {
if (space_num < 2) {
if (line[i] == ' ') {
space_num++;
if (space_num == 2) {
data = line + i + 1;
break;
}
continue;
}
}
if (space_num == 0) {
invoke_deepth[invoke_deepth_i++] = line[i];
continue;
}
if (space_num == 1) {
ins_str[ins_str_i++] = line[i];
continue;
}
}
2022-06-24 00:21:00 +08:00
exist_offset = constPool_getOffsetByData(&(self->const_pool), data);
/* get const offset */
if (strEqu(data, "")) {
/* not need const value */
const_pool_offset = 0;
} else if (65535 == exist_offset) {
/* push new const value */
const_pool_offset = constPool_getLastOffset(&(self->const_pool));
/* load const to const pool buff */
constPool_append(&(self->const_pool), data);
} else {
/* use exist const value */
const_pool_offset = exist_offset;
}
invoke_deepth_int = fast_atoi(invoke_deepth);
2022-06-24 00:21:00 +08:00
/* load Asm to byte code unit */
instructUnit_setBlockDeepth(&ins_unit, asmer.block_deepth_now);
instructUnit_setInvokeDeepth(&ins_unit, invoke_deepth_int);
2022-06-24 00:21:00 +08:00
instructUnit_setConstPoolIndex(&ins_unit, const_pool_offset);
instructUnit_setInstruct(&ins_unit, pikaVM_getInstructFromAsm(ins_str));
2022-06-24 00:21:00 +08:00
if (asmer.is_new_line) {
instructUnit_setIsNewLine(&ins_unit, 1);
asmer.is_new_line = 0;
}
/* append instructUnit to instructArray */
instructArray_append(&(self->instruct_array), &ins_unit);
next_line:
/* point to next line */
asmer.line_pointer += strGetLineSize(asmer.line_pointer) + 1;
arg_deinit(line_buff);
}
return self;
}
2022-08-26 00:51:41 +08:00
char* Parser_linesToArray(char* lines) {
2022-06-24 00:21:00 +08:00
ByteCodeFrame bytecode_frame;
byteCodeFrame_init(&bytecode_frame);
2022-08-26 00:51:41 +08:00
bytecodeFrame_fromLines(&bytecode_frame, lines);
2022-06-24 00:21:00 +08:00
/* do something */
byteCodeFrame_print(&bytecode_frame);
__platform_printf("\n\n/* clang-format off */\n");
__platform_printf("PIKA_PYTHON(\n");
__platform_printf("%s\n", lines);
__platform_printf(")\n");
__platform_printf("/* clang-format on */\n");
2022-06-24 00:21:00 +08:00
byteCodeFrame_printAsArray(&bytecode_frame);
/* deinit */
byteCodeFrame_deinit(&bytecode_frame);
__platform_printf("\n\n");
2022-08-26 00:51:41 +08:00
return NULL;
2022-06-24 00:21:00 +08:00
}