/* * This file is part of the PikaScript project. * http://github.com/pikastech/pikascript * * MIT License * * Copyright (c) 2021 lyon 李昂 liang6516@outlook.com * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "PikaParser.h" #include "BaseObj.h" #include "PikaObj.h" #include "dataQueue.h" #include "dataQueueObj.h" #include "dataStack.h" #include "dataStrs.h" /* local head */ typedef QueueObj AST; char* AST_toPikaASM(AST* ast, Args* outBuffs); char* Lexer_getTokens(Args* outBuffs, char* stmt); int32_t AST_deinit(AST* ast); char* Parser_multiLineToAsm(Args* outBuffs, char* multiLine); uint8_t Parser_isContainToken(char* tokens, enum TokenType token_type, char* pyload); void ParserState_init(struct ParserState* ps); void ParserState_parse(struct ParserState* ps, char* stmt); void ParserState_deinit(struct ParserState* ps); void ParserState_beforeIter(struct ParserState* ps); void ParserState_iterStart(struct ParserState* ps); void ParserState_iterEnd(struct ParserState* ps); char* Parser_popToken(Args* buffs_p, char* tokens); /* Syntex item */ const SyntaxItem syntexItemList[] = { /* clang-format off */ { .asmCode = "RUN", .astNodeName = "method", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "OPT", .astNodeName = "operator", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "BYT", .astNodeName = "bytes", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "NUM", .astNodeName = "num", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "IMP", .astNodeName = "import", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "REF", .astNodeName = "ref", .isUseNodeValue = PIKA_TRUE }, { .asmCode = "STR", .astNodeName = "string", .isUseNodeValue = PIKA_TRUE }, #if PIKA_SYNTAX_SLICE_ENABLE { .asmCode = "SLC", .astNodeName = "slice", .isUseNodeValue = PIKA_FALSE }, #endif #if PIKA_BUILTIN_STRUCT_ENABLE { .asmCode = "DCT", .astNodeName = "dict", .isUseNodeValue = PIKA_FALSE }, { .asmCode = "LST", .astNodeName = "list", .isUseNodeValue = PIKA_FALSE }, #endif { .asmCode = "OUT", .astNodeName = "left", .isUseNodeValue = PIKA_TRUE } /* clang-format on */ }; uint16_t Tokens_getSize(char* tokens) { if (strEqu("", tokens)) { return 0; } return strCountSign(tokens, 0x1F) + 1; } char* strsPopTokenWithSkip_byStr(Args* outBuffs, char* stmts, char* str, char skipStart, char skipEnd) { uint8_t divider_index = 0; Arg* keeped_arg = arg_newStr(""); Arg* poped_arg = arg_newStr(""); ParserState_forEachToken(ps, stmts) { ParserState_iterStart(&ps); if (ps.branket_deepth == 0) { if (strEqu(str, ps.token1.pyload)) { divider_index = ps.iter_index; } } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); ParserState_forEachTokenExistPs(ps, stmts) { ParserState_iterStart(&ps); if (ps.iter_index < divider_index) { poped_arg = arg_strAppend(poped_arg, ps.token1.pyload); } if (ps.iter_index > divider_index) { keeped_arg = arg_strAppend(keeped_arg, ps.token1.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); char* keeped = arg_getStr(keeped_arg); char* poped = strsCopy(outBuffs, arg_getStr(poped_arg)); __platform_memcpy(stmts, keeped, strGetSize(keeped) + 1); arg_deinit(poped_arg); arg_deinit(keeped_arg); return poped; } char* strsGetCleanCmd(Args* outBuffs, char* cmd) { pika_assert(cmd != NULL); int32_t size = strGetSize(cmd); /* lexer may generate more chars than input */ char* strOut = args_getBuff(outBuffs, size * 2); int32_t iOut = 0; ParserState_forEachToken(ps, cmd) { ParserState_iterStart(&ps); for (uint16_t k = 0; k < strGetSize(ps.token1.pyload); k++) { strOut[iOut] = ps.token1.pyload[k]; iOut++; } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); /* add \0 */ strOut[iOut] = 0; return strOut; } char* strsDeleteBetween(Args* buffs_p, char* strIn, char begin, char end) { int32_t size = strGetSize(strIn); char* strOut = args_getBuff(buffs_p, size); uint8_t deepth = 0; uint32_t iOut = 0; for (int i = 0; i < size; i++) { if (end == strIn[i]) { deepth--; } if (0 == deepth) { strOut[iOut] = strIn[i]; iOut++; } if (begin == strIn[i]) { deepth++; } } strOut[iOut] = 0; return strOut; } static uint8_t Lexer_isError(char* line) { Args buffs = {0}; uint8_t res = 0; /* not error */ char* tokens = Lexer_getTokens(&buffs, line); if (NULL == tokens) { res = 1; /* lex error */ goto exit; } goto exit; exit: strsDeinit(&buffs); return res; } static char* __removeTokensBetween(Args* outBuffs, char* input, char* token_pyload1, char* token_pyload2) { Args buffs = {0}; uint8_t block_deepth = 0; char* output = ""; ParserState_forEachToken(ps, input) { ParserState_iterStart(&ps); if (strEqu(token_pyload1, ps.token1.pyload)) { if (block_deepth == 0) { output = strsAppend(&buffs, output, ps.token1.pyload); } block_deepth++; } if (strEqu(token_pyload2, ps.token1.pyload)) { block_deepth--; } if (block_deepth == 0) { output = strsAppend(&buffs, output, ps.token1.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); output = strsCopy(outBuffs, output); strsDeinit(&buffs); return output; } static enum StmtType Lexer_matchStmtType(char* right) { Args buffs = {0}; enum StmtType stmtType = STMT_none; char* rightWithoutSubStmt = __removeTokensBetween(&buffs, right, "(", ")"); rightWithoutSubStmt = __removeTokensBetween(&buffs, rightWithoutSubStmt, "[", "]"); rightWithoutSubStmt = __removeTokensBetween(&buffs, rightWithoutSubStmt, "{", "}"); PIKA_BOOL is_get_operator = PIKA_FALSE; PIKA_BOOL is_get_method = PIKA_FALSE; PIKA_BOOL is_get_string = PIKA_FALSE; PIKA_BOOL is_get_bytes = PIKA_FALSE; PIKA_BOOL is_get_number = PIKA_FALSE; PIKA_BOOL is_get_symbol = PIKA_FALSE; PIKA_BOOL is_get_list = PIKA_FALSE; PIKA_BOOL is_get_slice = PIKA_FALSE; PIKA_BOOL is_get_dict = PIKA_FALSE; PIKA_BOOL is_get_import = PIKA_FALSE; PIKA_BOOL is_get_chain = PIKA_FALSE; ParserState_forEachToken(ps, rightWithoutSubStmt) { ParserState_iterStart(&ps); /* collect type */ if (strEqu(ps.token1.pyload, " import ")) { is_get_import = PIKA_TRUE; goto iter_continue; } if (strEqu(ps.token2.pyload, "[")) { /* (symble | iteral | <]> | <)>) + <[> */ if (TOKEN_symbol == ps.token1.type || TOKEN_literal == ps.token1.type || strEqu(ps.token1.pyload, "]") || strEqu(ps.token1.pyload, ")")) { is_get_slice = PIKA_TRUE; goto iter_continue; } /* ( <,> | <=> ) + <[> */ is_get_list = PIKA_TRUE; goto iter_continue; } if (strEqu(ps.token1.pyload, "[") && ps.iter_index == 1) { /* VOID + <[> */ is_get_list = PIKA_TRUE; goto iter_continue; } if (strEqu(ps.token1.pyload, "...")) { goto iter_continue; } if (strIsStartWith(ps.token1.pyload, ".")) { if (ps.iter_index != 1) { is_get_chain = PIKA_TRUE; goto iter_continue; } } if (strEqu(ps.token1.pyload, "{")) { is_get_dict = PIKA_TRUE; goto iter_continue; } if (ps.token1.type == TOKEN_operator) { is_get_operator = PIKA_TRUE; goto iter_continue; } /* <(> */ if (strEqu(ps.token1.pyload, "(")) { is_get_method = PIKA_TRUE; goto iter_continue; } if (ps.token1.type == TOKEN_literal) { if (ps.token1.pyload[0] == '\'' || ps.token1.pyload[0] == '"') { is_get_string = PIKA_TRUE; goto iter_continue; } if (ps.token1.pyload[1] == '\'' || ps.token1.pyload[1] == '"') { if (ps.token1.pyload[0] == 'b') { is_get_bytes = PIKA_TRUE; goto iter_continue; } } is_get_number = PIKA_TRUE; goto iter_continue; } if (ps.token1.type == TOKEN_symbol) { is_get_symbol = PIKA_TRUE; goto iter_continue; } iter_continue: ParserState_iterEnd(&ps); } if (is_get_import) { stmtType = STMT_import; goto exit; } if (is_get_operator) { stmtType = STMT_operator; goto exit; } if (is_get_slice) { stmtType = STMT_slice; goto exit; } if (is_get_list) { stmtType = STMT_list; goto exit; } if (is_get_dict) { stmtType = STMT_dict; goto exit; } if (is_get_chain) { stmtType = STMT_chain; goto exit; } if (is_get_method) { stmtType = STMT_method; goto exit; } if (is_get_string) { stmtType = STMT_string; goto exit; } if (is_get_bytes) { stmtType = STMT_bytes; goto exit; } if (is_get_number) { stmtType = STMT_number; goto exit; } if (is_get_symbol) { stmtType = STMT_reference; goto exit; } exit: ParserState_deinit(&ps); strsDeinit(&buffs); return stmtType; } char* Lexer_printTokens(Args* outBuffs, char* tokens) { /* init */ Args buffs = {0}; char* printOut = strsCopy(&buffs, ""); /* process */ uint16_t token_size = Tokens_getSize(tokens); for (uint16_t i = 0; i < token_size; i++) { char* token = Parser_popToken(&buffs, tokens); if (token[0] == TOKEN_operator) { printOut = strsAppend(&buffs, printOut, "{opt}"); printOut = strsAppend(&buffs, printOut, token + 1); } if (token[0] == TOKEN_devider) { printOut = strsAppend(&buffs, printOut, "{dvd}"); printOut = strsAppend(&buffs, printOut, token + 1); } if (token[0] == TOKEN_symbol) { printOut = strsAppend(&buffs, printOut, "{sym}"); printOut = strsAppend(&buffs, printOut, token + 1); } if (token[0] == TOKEN_literal) { printOut = strsAppend(&buffs, printOut, "{lit}"); printOut = strsAppend(&buffs, printOut, token + 1); } } /* out put */ printOut = strsCopy(outBuffs, printOut); strsDeinit(&buffs); return printOut; } uint8_t Parser_checkIsDirect(char* str) { Args buffs = {0}; char* tokens = Lexer_getTokens(&buffs, str); uint8_t res = 0; pika_assert(NULL != tokens); if (Parser_isContainToken(tokens, TOKEN_operator, "=")) { res = 1; goto exit; } exit: strsDeinit(&buffs); return res; } Arg* Lexer_setToken(Arg* tokens_arg, enum TokenType token_type, char* operator) { Args buffs = {0}; char token_type_buff[3] = {0}; token_type_buff[0] = 0x1F; token_type_buff[1] = token_type; char* tokens = arg_getStr(tokens_arg); tokens = strsAppend(&buffs, tokens, token_type_buff); tokens = strsAppend(&buffs, tokens, operator); Arg* new_tokens_arg = arg_setStr(tokens_arg, "", tokens); arg_deinit(tokens_arg); strsDeinit(&buffs); return new_tokens_arg; } Arg* Lexer_setSymbel(Arg* tokens_arg, char* stmt, int32_t i, int32_t* symbol_start_index) { Args buffs = {0}; char* symbol_buff = NULL; if (-1 == *symbol_start_index) { /* no found symbol start index */ goto exit; } /* nothing to add symbel */ if (i == *symbol_start_index) { goto exit; } symbol_buff = args_getBuff(&buffs, i - *symbol_start_index); __platform_memcpy(symbol_buff, stmt + *symbol_start_index, i - *symbol_start_index); /* literal */ if ((symbol_buff[0] == '\'') || (symbol_buff[0] == '"')) { /* "" or '' */ tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff); goto exit; } if ((symbol_buff[0] >= '0') && (symbol_buff[0] <= '9')) { /* number */ tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff); goto exit; } if ((symbol_buff[0] == 'b') && ((symbol_buff[1] == '\'') || (symbol_buff[1] == '"'))) { /* b"" or b'' */ tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff); goto exit; } /* symbol */ tokens_arg = Lexer_setToken(tokens_arg, TOKEN_symbol, symbol_buff); goto exit; exit: *symbol_start_index = -1; strsDeinit(&buffs); return tokens_arg; } /* tokens is devided by space */ /* a token is [TOKENTYPE|(CONTENT)] */ char* Lexer_getTokens(Args* outBuffs, char* stmt) { /* init */ Arg* tokens_arg = New_arg(NULL); tokens_arg = arg_setStr(tokens_arg, "", ""); int32_t size = strGetSize(stmt); uint8_t bracket_deepth = 0; uint8_t cn1 = 0; uint8_t c0 = 0; uint8_t c1 = 0; uint8_t c2 = 0; uint8_t c3 = 0; uint8_t c4 = 0; uint8_t c5 = 0; uint8_t c6 = 0; int32_t symbol_start_index = -1; int is_in_string = 0; char* tokens; /* process */ for (int32_t i = 0; i < size; i++) { /* update char */ cn1 = 0; c0 = stmt[i]; c1 = 0; c2 = 0; c3 = 0; c4 = 0; c5 = 0; c6 = 0; if (i - 1 >= 0) { cn1 = stmt[i - 1]; } if (i + 1 < size) { c1 = stmt[i + 1]; } if (i + 2 < size) { c2 = stmt[i + 2]; } if (i + 3 < size) { c3 = stmt[i + 3]; } if (i + 4 < size) { c4 = stmt[i + 4]; } if (i + 5 < size) { c5 = stmt[i + 5]; } if (i + 6 < size) { c6 = stmt[i + 6]; } if (-1 == symbol_start_index) { symbol_start_index = i; } /* solve string */ if (0 == is_in_string) { if ('\'' == c0 && '\\' != cn1) { /* in ' */ is_in_string = 1; continue; } if ('"' == c0 && '\\' != cn1) { /* in "" */ is_in_string = 2; continue; } } if (1 == is_in_string) { if ('\'' == c0 && '\\' != cn1) { is_in_string = 0; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1, &symbol_start_index); } continue; } if (2 == is_in_string) { if ('"' == c0 && '\\' != cn1) { is_in_string = 0; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1, &symbol_start_index); } continue; } /* match devider*/ if (('(' == c0) || (')' == c0) || (',' == c0) || ('[' == c0) || (']' == c0) || (':' == c0) || ('{' == c0) || ('}' == c0)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); char content[2] = {0}; content[0] = c0; tokens_arg = Lexer_setToken(tokens_arg, TOKEN_devider, content); if (c0 == '(') { bracket_deepth++; } if (c0 == ')') { bracket_deepth--; } continue; } /* match operator */ if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) || ('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) || ('%' == c0) || ('&' == c0) || ('|' == c0) || ('^' == c0) || ('~' == c0)) { if (('*' == c0) || ('/' == c0)) { /* =, **=, // */ if ((c0 == c1) && ('=' == c2)) { char content[4] = {0}; content[0] = c0; content[1] = c1; content[2] = '='; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content); i = i + 2; continue; } } /* >>, <<, **, // */ if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0)) { if (c0 == c1) { char content[3] = {0}; content[0] = c0; content[1] = c1; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content); i = i + 1; continue; } } /* >=, <=, *=, /=, +=, -=, !=, ==, %= */ if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) || ('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) || ('%' == c0)) { if ('=' == c1) { char content[3] = {0}; content[0] = c0; content[1] = c1; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content); i = i + 1; continue; } } /* single operator */ /* +, -, *, ... / */ char content[2] = {0}; content[0] = c0; tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content); continue; } // not the string operator if ((cn1 >= 'a' && cn1 <= 'z') || (cn1 >= 'A' && cn1 <= 'Z') || (cn1 >= '0' && cn1 <= '9') || cn1 == '_' || cn1 == '.') { goto after_match_string_operator; } /* not */ if ('n' == c0) { if (('o' == c1) && ('t' == c2) && (' ' == c3)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " not "); i = i + 3; continue; } } /* and */ if ('a' == c0) { if (('n' == c1) && ('d' == c2) && (' ' == c3)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " and "); i = i + 3; continue; } } /* or */ if ('o' == c0) { if (('r' == c1) && (' ' == c2)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " or "); i = i + 2; continue; } } /* as */ if ('a' == c0) { if (('s' == c1) && (' ' == c2)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " as "); i = i + 2; continue; } } /* import */ if ('i' == c0) { if (('m' == c1) && ('p' == c2) && ('o' == c3) && ('r' == c4) && ('t' == c5) && (' ' == c6)) { tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " import "); i = i + 5; continue; } } after_match_string_operator: /* skip spaces */ if (' ' == c0) { /* not get symbal */ if (i == symbol_start_index) { symbol_start_index++; } else { /* already get symbal */ tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index); } } if (i == size - 1) { /* last check symbel */ // if('\n' == c0){ // continue; // } tokens_arg = Lexer_setSymbel(tokens_arg, stmt, size, &symbol_start_index); } } if (0 != bracket_deepth) { /* bracket match error */ tokens = NULL; goto exit; } /* output */ tokens = arg_getStr(tokens_arg); tokens = strsCopy(outBuffs, tokens); exit: arg_deinit(tokens_arg); return tokens; } char* Parser_popToken(Args* buffs_p, char* tokens) { return strsPopToken(buffs_p, tokens, 0x1F); } enum TokenType Token_getType(char* token) { return (enum TokenType)token[0]; } char* Token_getPyload(char* token) { return (char*)((uintptr_t)token + 1); } uint8_t Parser_isContainToken(char* tokens, enum TokenType token_type, char* pyload) { Args buffs = {0}; char* tokens_buff = strsCopy(&buffs, tokens); uint8_t res = 0; uint16_t token_size = Tokens_getSize(tokens); for (int i = 0; i < token_size; i++) { char* token = Parser_popToken(&buffs, tokens_buff); if (token_type == Token_getType(token)) { if (strEqu(Token_getPyload(token), pyload)) { res = 1; goto exit; } } } exit: strsDeinit(&buffs); return res; } static const char operators[][9] = { "**", "~", "*", "/", "%", "//", "+", "-", ">>", "<<", "&", "^", "|", "<", "<=", ">", ">=", "!=", "==", "%=", "/=", "//=", "-=", "+=", "*=", "**=", " not ", " and ", " or ", " import "}; char* Lexer_getOperator(Args* outBuffs, char* stmt) { Args buffs = {0}; char* operator= NULL; char* tokens = Lexer_getTokens(&buffs, stmt); // use parse state foreach to get operator for (uint32_t i = 0; i < sizeof(operators) / 9; i++) { ParserState_forEachToken(ps, tokens) { ParserState_iterStart(&ps); // get operator if (strEqu(ps.token2.pyload, (char*)operators[i])) { // solve the iuuse of "~-1" operator= strsCopy(&buffs, (char*)operators[i]); ParserState_iterEnd(&ps); break; } ParserState_iterEnd(&ps); }; ParserState_deinit(&ps); } /* solve the iuuse of "~-1" */ if (strEqu(operator, "-")) { ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (strEqu(ps.token2.pyload, "-")) { if (ps.token1.type == TOKEN_operator) { operator= strsCopy(&buffs, ps.token1.pyload); ParserState_iterEnd(&ps); break; } } ParserState_iterEnd(&ps); }; ParserState_deinit(&ps); } /* match the last operator in equal level */ if ((strEqu(operator, "+")) || (strEqu(operator, "-"))) { ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (strEqu(ps.token1.pyload, "+")) { operator= strsCopy(&buffs, "+"); } if (strEqu(ps.token1.pyload, "-")) { operator= strsCopy(&buffs, "-"); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); } /* out put */ if (NULL == operator) { return NULL; } operator= strsCopy(outBuffs, operator); strsDeinit(&buffs); return operator; } const char void_str[] = ""; void LexToken_update(struct LexToken* lex_token) { lex_token->type = Token_getType(lex_token->token); if (lex_token->type == TOKEN_strEnd) { lex_token->pyload = (char*)void_str; } else { lex_token->pyload = Token_getPyload(lex_token->token); } } void ParserState_iterStart(struct ParserState* ps) { ps->iter_index++; ps->iter_buffs = New_strBuff(); /* token1 is the last token */ ps->token1.token = strsCopy(ps->iter_buffs, arg_getStr(ps->last_token)); /* token2 is the next token */ ps->token2.token = Parser_popToken(ps->iter_buffs, ps->tokens); /* store last token */ arg_deinit(ps->last_token); ps->last_token = arg_newStr(ps->token2.token); LexToken_update(&ps->token1); LexToken_update(&ps->token2); if (strEqu(ps->token1.pyload, "(")) { ps->branket_deepth++; } if (strEqu(ps->token1.pyload, ")")) { ps->branket_deepth--; } if (strEqu(ps->token1.pyload, "[")) { ps->branket_deepth++; } if (strEqu(ps->token1.pyload, "]")) { ps->branket_deepth--; } } void LexToken_init(struct LexToken* lt) { lt->pyload = NULL; lt->token = NULL; lt->type = TOKEN_strEnd; } void ParserState_init(struct ParserState* ps) { ps->tokens = NULL; ps->length = 0; ps->iter_index = 0; ps->branket_deepth = 0; ps->last_token = NULL; ps->iter_buffs = NULL; ps->buffs_p = New_strBuff(); ps->result = PIKA_RES_OK; LexToken_init(&ps->token1); LexToken_init(&ps->token2); } void ParserState_iterEnd(struct ParserState* ps) { args_deinit(ps->iter_buffs); } void ParserState_deinit(struct ParserState* ps) { if (NULL != ps->last_token) { arg_deinit(ps->last_token); } args_deinit(ps->buffs_p); } void ParserState_parse(struct ParserState* ps, char* stmt) { if (NULL == stmt) { ps->result = PIKA_RES_ERR_SYNTAX_ERROR; return; } ps->tokens = Lexer_getTokens(ps->buffs_p, stmt); if (NULL == ps->tokens) { ps->result = PIKA_RES_ERR_SYNTAX_ERROR; return; } ps->length = Tokens_getSize(ps->tokens); } void ParserState_beforeIter(struct ParserState* ps) { /* clear first token */ if (ps->result != PIKA_RES_OK) { return; } Parser_popToken(ps->buffs_p, ps->tokens); ps->last_token = arg_newStr(Parser_popToken(ps->buffs_p, ps->tokens)); } #if PIKA_SYNTAX_SLICE_ENABLE static void Slice_getPars(Args* outBuffs, char* inner, char** pStart, char** pEnd, char** pStep) { Args buffs = {0}; *pStart = ""; *pEnd = ""; *pStep = ""; /* slice */ uint8_t colon_i = 0; ParserState_forEachToken(ps, inner) { ParserState_iterStart(&ps); if (strEqu(ps.token1.pyload, ":") && ps.branket_deepth == 0) { colon_i++; goto iter_continue1; } if (colon_i == 0) { *pStart = strsAppend(&buffs, *pStart, ps.token1.pyload); } if (colon_i == 1) { *pEnd = strsAppend(&buffs, *pEnd, ps.token1.pyload); } if (colon_i == 2) { *pStep = strsAppend(&buffs, *pStep, ps.token1.pyload); } iter_continue1: ParserState_iterEnd(&ps); } ParserState_deinit(&ps); if (colon_i == 1) { *pStep = "1"; if (strEqu(*pStart, "")) { *pStart = "0"; } if (strEqu(*pEnd, "")) { *pEnd = "-1"; } } if (colon_i == 0) { *pEnd = strsAppend(&buffs, *pStart, " + 1"); *pStep = "1"; } /* slice with step */ /* output */ *pStart = strsCopy(outBuffs, *pStart); *pEnd = strsCopy(outBuffs, *pEnd); *pStep = strsCopy(outBuffs, *pStep); /* clean */ strsDeinit(&buffs); } #endif #if PIKA_SYNTAX_SLICE_ENABLE char* Suger_solveLeftBranckets(Args* outBuffs, char* right, char** left_p) { /* init objects */ Args buffs = {0}; Arg* right_arg = arg_newStr(""); char* left = *left_p; uint8_t is_in_brancket = 0; args_setStr(&buffs, "inner", ""); uint8_t matched = 0; char* right_res = NULL; /* exit when NULL */ if (NULL == left) { arg_deinit(right_arg); right_arg = arg_setStr(right_arg, "", right); goto exit; } /* exit when not match (symble|iteral)'[' */ ParserState_forEachToken(ps, left) { ParserState_iterStart(&ps); if (strEqu(ps.token2.pyload, "[")) { if (TOKEN_symbol == ps.token1.type || TOKEN_literal == ps.token1.type) { matched = 1; ParserState_iterEnd(&ps); break; } } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); if (!matched) { /* not contain '[', return origin */ arg_deinit(right_arg); right_arg = arg_setStr(right_arg, "", right); goto exit; } /* matched [] */ ParserState_forEachTokenExistPs(ps, left) { ParserState_iterStart(&ps); /* found '[' */ if ((TOKEN_devider == ps.token2.type) && (strEqu(ps.token2.pyload, "["))) { /* get 'obj' from obj[] */ args_setStr(&buffs, "obj", ps.token1.pyload); is_in_brancket = 1; /* fond ']' */ } else if ((TOKEN_devider == ps.token2.type) && (strEqu(ps.token2.pyload, "]"))) { is_in_brancket = 0; char* inner = args_getStr(&buffs, "inner"); Arg* inner_arg = arg_newStr(inner); inner_arg = arg_strAppend(inner_arg, ps.token1.pyload); args_setStr(&buffs, "inner", arg_getStr(inner_arg)); arg_deinit(inner_arg); /* update inner pointer */ inner = args_getStr(&buffs, "inner"); char* start = NULL; char* end = NULL; char* step = NULL; Slice_getPars(&buffs, inner, &start, &end, &step); /* obj = __setitem__(obj, key, val) */ right_arg = arg_strAppend(right_arg, "__setitem__("); right_arg = arg_strAppend(right_arg, args_getStr(&buffs, "obj")); right_arg = arg_strAppend(right_arg, ","); right_arg = arg_strAppend(right_arg, start); right_arg = arg_strAppend(right_arg, ","); right_arg = arg_strAppend(right_arg, right); right_arg = arg_strAppend(right_arg, ")"); /* clean the inner */ args_setStr(&buffs, "inner", ""); /* in brancket and found '[' */ } else if (is_in_brancket && (!strEqu(ps.token1.pyload, "["))) { char* inner = args_getStr(&buffs, "inner"); Arg* index_arg = arg_newStr(inner); index_arg = arg_strAppend(index_arg, ps.token1.pyload); args_setStr(&buffs, "inner", arg_getStr(index_arg)); arg_deinit(index_arg); /* out of brancket and not found ']' */ } else if (!is_in_brancket && (!strEqu(ps.token1.pyload, "]"))) { if (TOKEN_strEnd != ps.token1.type) { right_arg = arg_strAppend(right_arg, ps.token1.pyload); } } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); /* clean the left */ for (size_t i = 0; i < strGetSize(left); i++) { if (left[i] == '[') { left[i] = '\0'; break; } } exit: /* clean and return */ right_res = strsCopy(outBuffs, arg_getStr(right_arg)); arg_deinit(right_arg); strsDeinit(&buffs); return right_res; } #endif #if PIKA_SYNTAX_FORMAT_ENABLE char* Suger_solveFormat(Args* outBuffs, char* right) { /* quick skip */ if (!strIsContain(right, '%')) { return right; } PIKA_BOOL is_format = PIKA_FALSE; ParserState_forEachToken(ps1, right) { ParserState_iterStart(&ps1); if (ps1.branket_deepth == 0 && strEqu(ps1.token1.pyload, "%")) { is_format = PIKA_TRUE; } ParserState_iterEnd(&ps1); } ParserState_deinit(&ps1); if (PIKA_FALSE == is_format) { return right; } char* res = right; Arg* str_buf = arg_newStr(""); Arg* var_buf = arg_newStr(""); PIKA_BOOL is_in_format = PIKA_FALSE; PIKA_BOOL is_tuple = PIKA_FALSE; PIKA_BOOL is_out_vars = PIKA_FALSE; Args buffs = {0}; char* fmt = NULL; ParserState_forEachToken(ps, right) { char* item = ""; ParserState_iterStart(&ps); if (PIKA_FALSE == is_in_format) { if (ps.token1.type != TOKEN_literal) { item = ps.token1.pyload; goto iter_continue; } if (ps.token1.pyload[0] != '\'' && ps.token1.pyload[0] != '"') { item = ps.token1.pyload; goto iter_continue; } if (!strEqu(ps.token2.pyload, "%")) { item = ps.token1.pyload; goto iter_continue; } /* found the format stmt */ is_in_format = PIKA_TRUE; fmt = strsCopy(&buffs, ps.token1.pyload); goto iter_continue; } if (PIKA_TRUE == is_in_format) { /* check the format vars */ if (strEqu(ps.token1.pyload, "%")) { /* is a tuple */ if (strEqu(ps.token2.pyload, "(")) { is_tuple = PIKA_TRUE; } else { var_buf = arg_strAppend(var_buf, ps.token2.pyload); } goto iter_continue; } /* found the end of tuple */ if (ps.iter_index == ps.length) { is_out_vars = PIKA_TRUE; is_in_format = PIKA_FALSE; } else { /* push the vars inner the tuple */ var_buf = arg_strAppend(var_buf, ps.token2.pyload); } if (is_out_vars) { if (is_tuple) { str_buf = arg_strAppend(str_buf, "cformat("); str_buf = arg_strAppend(str_buf, fmt); str_buf = arg_strAppend(str_buf, ","); str_buf = arg_strAppend(str_buf, arg_getStr(var_buf)); } else { str_buf = arg_strAppend(str_buf, "cformat("); str_buf = arg_strAppend(str_buf, fmt); str_buf = arg_strAppend(str_buf, ","); str_buf = arg_strAppend(str_buf, arg_getStr(var_buf)); str_buf = arg_strAppend(str_buf, ")"); } } } iter_continue: if (!is_in_format) { str_buf = arg_strAppend(str_buf, item); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); res = strsCopy(outBuffs, arg_getStr(str_buf)); arg_deinit(str_buf); arg_deinit(var_buf); strsDeinit(&buffs); return res; } #endif uint8_t Parser_solveSelfOperator(Args* outbuffs, char* stmt, char** right_p, char** left_p) { char* left_new = NULL; char* right_new = NULL; Arg* left_arg = arg_newStr(""); Arg* right_arg = arg_newStr(""); Arg* right_arg_new = arg_newStr(""); uint8_t is_left_exist = 0; Args buffs = {0}; char _operator[2] = {0}; char* operator=(char*) _operator; char* tokens = Lexer_getTokens(&buffs, stmt); uint8_t is_right = 0; if (Parser_isContainToken(tokens, TOKEN_operator, "+=")) { operator[0] = '+'; } if (Parser_isContainToken(tokens, TOKEN_operator, "-=")) { operator[0] = '-'; } if (Parser_isContainToken(tokens, TOKEN_operator, "*=")) { operator[0] = '*'; } if (Parser_isContainToken(tokens, TOKEN_operator, "/=")) { operator[0] = '/'; } /* not found self operator */ if (operator[0] == 0) { goto exit; } /* found self operator */ is_left_exist = 1; ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if ((strEqu(ps.token1.pyload, "*=")) || (strEqu(ps.token1.pyload, "/=")) || (strEqu(ps.token1.pyload, "+=")) || (strEqu(ps.token1.pyload, "-="))) { is_right = 1; goto iter_continue; } if (!is_right) { left_arg = arg_strAppend(left_arg, ps.token1.pyload); } else { right_arg = arg_strAppend(right_arg, ps.token1.pyload); } iter_continue: ParserState_iterEnd(&ps); } ParserState_deinit(&ps); /* connect right */ right_arg_new = arg_strAppend(right_arg_new, arg_getStr(left_arg)); right_arg_new = arg_strAppend(right_arg_new, operator); right_arg_new = arg_strAppend(right_arg_new, "("); right_arg_new = arg_strAppend(right_arg_new, arg_getStr(right_arg)); right_arg_new = arg_strAppend(right_arg_new, ")"); /* collect left_new and right_new */ left_new = arg_getStr(left_arg); right_new = arg_getStr(right_arg_new); exit: strsDeinit(&buffs); if (NULL != right_new) { *(right_p) = strsCopy(outbuffs, right_new); ; } if (NULL != left_new) { *(left_p) = strsCopy(outbuffs, left_new); } arg_deinit(right_arg); arg_deinit(left_arg); arg_deinit(right_arg_new); return is_left_exist; } PIKA_RES AST_setThisNode(AST* ast, char* node_type, char* node_content) { return obj_setStr(ast, node_type, node_content); } AST* AST_parseStmt(AST* ast, char* stmt); PIKA_RES AST_parseSubStmt(AST* ast, char* node_content) { queueObj_pushObj(ast, (char*)"stmt"); AST_parseStmt(queueObj_getCurrentObj(ast), node_content); return PIKA_RES_OK; } char* Parser_popSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) { Arg* substmt_arg = arg_newStr(""); Arg* newstmt_arg = arg_newStr(""); char* stmt = *stmt_p; PIKA_BOOL is_get_substmt = PIKA_FALSE; Args buffs = {0}; ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (is_get_substmt) { /* get new stmt */ newstmt_arg = arg_strAppend(newstmt_arg, ps.token1.pyload); ParserState_iterEnd(&ps); continue; } if (ps.branket_deepth > 0) { /* ignore */ substmt_arg = arg_strAppend(substmt_arg, ps.token1.pyload); ParserState_iterEnd(&ps); continue; } if (strEqu(ps.token1.pyload, delimiter)) { /* found delimiter */ is_get_substmt = PIKA_TRUE; ParserState_iterEnd(&ps); continue; } /* collect substmt */ substmt_arg = arg_strAppend(substmt_arg, ps.token1.pyload); ParserState_iterEnd(&ps); } ParserState_deinit(&ps); strsDeinit(&buffs); char* substmt = strsCacheArg(outbuffs, substmt_arg); char* newstmt = strsCacheArg(outbuffs, newstmt_arg); *stmt_p = newstmt; return substmt; } char* Parser_popLastSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) { uint8_t last_stmt_i = 0; char* stmt = *stmt_p; ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (strIsStartWith(ps.token1.pyload, delimiter)) { /* found delimiter */ if (!strEqu(delimiter, "[") && ps.branket_deepth > 0) { /* ignore */ ParserState_iterEnd(&ps); continue; } /* for "[" */ if (ps.branket_deepth > 1) { /* ignore */ ParserState_iterEnd(&ps); continue; } last_stmt_i = ps.iter_index; } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); Arg* mainStmt = arg_newStr(""); Arg* lastStmt = arg_newStr(""); { ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (ps.iter_index < last_stmt_i) { mainStmt = arg_strAppend(mainStmt, ps.token1.pyload); } if (ps.iter_index >= last_stmt_i) { lastStmt = arg_strAppend(lastStmt, ps.token1.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); } *stmt_p = strsCacheArg(outbuffs, mainStmt); return strsCacheArg(outbuffs, lastStmt); } AST* AST_parseStmt(AST* ast, char* stmt) { Args buffs = {0}; char* assignment = strsGetFirstToken(&buffs, stmt, '('); char* method = NULL; char* ref = NULL; char* str = NULL; char* num = NULL; char* left = NULL; char* right = NULL; char* import = NULL; PIKA_RES result = PIKA_RES_OK; right = stmt; /* solve check direct */ uint8_t isLeftExist = 0; if (Parser_checkIsDirect(assignment)) { isLeftExist = 1; left = strsCopy(&buffs, ""); right = strsCopy(&buffs, ""); uint8_t is_meet_equ = 0; ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (strEqu(ps.token1.pyload, "=") && ps.token1.type == TOKEN_operator) { is_meet_equ = 1; ParserState_iterEnd(&ps); continue; } if (0 == is_meet_equ) { left = strsAppend(&buffs, left, ps.token1.pyload); } if (1 == is_meet_equ) { right = strsAppend(&buffs, right, ps.token1.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); } /* solve the += -= /= *= stmt */ if (!isLeftExist) { isLeftExist = Parser_solveSelfOperator(&buffs, stmt, &right, &left); } #if PIKA_SYNTAX_SLICE_ENABLE /* solve the [] stmt */ right = Suger_solveLeftBranckets(&buffs, right, &left); #endif #if PIKA_SYNTAX_FORMAT_ENABLE right = Suger_solveFormat(&buffs, right); #endif /* set left */ if (isLeftExist) { AST_setThisNode(ast, (char*)"left", left); } /* match statment type */ enum StmtType stmtType = Lexer_matchStmtType(right); /* solve operator stmt */ if (STMT_operator == stmtType) { char* rightWithoutSubStmt = strsDeleteBetween(&buffs, right, '(', ')'); char* operator= Lexer_getOperator(&buffs, rightWithoutSubStmt); if (NULL == operator) { result = PIKA_RES_ERR_SYNTAX_ERROR; goto exit; } AST_setThisNode(ast, (char*)"operator", operator); char* rightBuff = strsCopy(&buffs, right); char* subStmt1 = strsPopTokenWithSkip_byStr(&buffs, rightBuff, operator, '(', ')'); char* subStmt2 = rightBuff; AST_parseSubStmt(ast, subStmt1); AST_parseSubStmt(ast, subStmt2); goto exit; } #if PIKA_BUILTIN_STRUCT_ENABLE /* solve list stmt */ if (STMT_list == stmtType) { AST_setThisNode(ast, (char*)"list", "list"); char* subStmts = strsCut(&buffs, right, '[', ']'); subStmts = strsAppend(&buffs, subStmts, ","); while (1) { char* subStmt = Parser_popSubStmt(&buffs, &subStmts, ","); AST_parseSubStmt(ast, subStmt); if (strEqu(subStmts, "")) { break; } } goto exit; } #endif #if PIKA_BUILTIN_STRUCT_ENABLE /* solve dict stmt */ if (STMT_dict == stmtType) { AST_setThisNode(ast, (char*)"dict", "dict"); char* subStmts = strsCut(&buffs, right, '{', '}'); subStmts = strsAppend(&buffs, subStmts, ","); while (1) { char* subStmt = Parser_popSubStmt(&buffs, &subStmts, ","); char* key = Parser_popSubStmt(&buffs, &subStmt, ":"); char* value = subStmt; AST_parseSubStmt(ast, key); AST_parseSubStmt(ast, value); if (strEqu(subStmts, "")) { break; } } goto exit; } #endif /* solve method chain */ if (STMT_chain == stmtType) { char* stmt = strsCopy(&buffs, right); char* lastStmt = Parser_popLastSubStmt(&buffs, &stmt, "."); AST_parseSubStmt(ast, stmt); AST_parseStmt(ast, lastStmt); goto exit; } #if PIKA_SYNTAX_SLICE_ENABLE if (STMT_slice == stmtType) { /* solve slice stmt */ AST_setThisNode(ast, (char*)"slice", "slice"); char* stmt = strsCopy(&buffs, right); char* laststmt = Parser_popLastSubStmt(&buffs, &stmt, "["); AST_parseSubStmt(ast, stmt); char* slice_list = strsCut(&buffs, laststmt, '[', ']'); slice_list = strsAppend(&buffs, slice_list, ":"); int index = 0; while (1) { char* slice_str = Parser_popSubStmt(&buffs, &slice_list, ":"); if (index == 0 && strEqu(slice_str, "")) { AST_parseSubStmt(ast, "0"); } else if (index == 1 && strEqu(slice_str, "")) { AST_parseSubStmt(ast, "-1"); } else { AST_parseSubStmt(ast, slice_str); } index++; if (strEqu("", slice_list)) { break; } } goto exit; } #endif /* solve method stmt */ if (STMT_method == stmtType) { method = strsGetFirstToken(&buffs, right, '('); AST_setThisNode(ast, (char*)"method", method); char* subStmts = strsCut(&buffs, right, '(', ')'); pika_assert(NULL != subStmts); /* add ',' at the end */ subStmts = strsAppend(&buffs, subStmts, ","); while (1) { char* substmt = Parser_popSubStmt(&buffs, &subStmts, ","); AST_parseSubStmt(ast, substmt); if (strEqu("", subStmts)) { break; } } goto exit; } /* solve reference stmt */ if (STMT_reference == stmtType) { ref = right; AST_setThisNode(ast, (char*)"ref", ref); goto exit; } /* solve import stmt */ if (STMT_import == stmtType) { import = strsGetLastToken(&buffs, right, ' '); AST_setThisNode(ast, (char*)"import", import); goto exit; } /* solve str stmt */ if (STMT_string == stmtType) { str = strsCopy(&buffs, right); /* remove the first char */ str = str + 1; /* remove the last char */ str[strGetSize(str) - 1] = '\0'; /* replace */ if (strIsContain(str, '\\')) { str = strsReplace(&buffs, str, "\\\"", "\""); str = strsReplace(&buffs, str, "\\'", "'"); } AST_setThisNode(ast, (char*)"string", str); goto exit; } /* solve bytes stmt */ if (STMT_bytes == stmtType) { str = right + 1; str = strsDeleteChar(&buffs, str, '\''); str = strsDeleteChar(&buffs, str, '\"'); AST_setThisNode(ast, (char*)"bytes", str); goto exit; } /* solve number stmt */ if (STMT_number == stmtType) { num = right; AST_setThisNode(ast, (char*)"num", num); goto exit; } exit: strsDeinit(&buffs); if (result != PIKA_RES_OK) { AST_deinit(ast); return NULL; } return ast; } static int32_t Parser_getPyLineBlockDeepth(char* line) { uint32_t size = strGetSize(line); for (uint32_t i = 0; i < size; i++) { if (line[i] != ' ') { uint32_t spaceNum = i; if (0 == spaceNum % 4) { return spaceNum / 4; } /* space Num is not 4N, error*/ return -1; } } return 0; } char* Parser_removeAnnotation(char* line) { uint8_t is_annotation_exit = 0; uint8_t is_in_single_quotes = 0; uint8_t is_in_double_quotes_deepth = 0; for (uint32_t i = 0; i < strGetSize(line); i++) { if ('\'' == line[i]) { is_in_single_quotes = !is_in_single_quotes; continue; } if ('"' == line[i]) { is_in_double_quotes_deepth = !is_in_double_quotes_deepth; continue; } if (!(is_in_single_quotes == 0 && is_in_double_quotes_deepth == 0)) { continue; } if ('#' == line[i]) { /* end the line */ line[i] = 0; is_annotation_exit = 1; break; } } /* no annotation, exit */ if (!is_annotation_exit) { return line; } /* check empty line */ for (uint32_t i = 0; i < strGetSize(line); i++) { if (' ' != line[i]) { return line; } } /* is an emply line */ line = "@annontation"; return line; } /* match block start keywords */ const char control_keywords[][9] = {"break", "continue"}; /* normal keyward */ const char normal_keywords[][7] = {"while", "if", "elif"}; AST* AST_parseLine(char* line, Stack* block_stack) { /* line is not exist */ if (line == NULL) { return NULL; } /* init data */ AST* ast = New_queueObj(); Args buffs = {0}; int8_t block_deepth_now, block_deepth_last = -1; char *line_start, *stmt; /* get block deepth */ block_deepth_now = Parser_getPyLineBlockDeepth(line); /* set block deepth */ if (block_deepth_now == -1) { /* get block_deepth error */ __platform_printf( "IndentationError: unexpected indent, only support 4 spaces\r\n"); obj_deinit(ast); ast = NULL; goto exit; } obj_setInt(ast, "blockDeepth", block_deepth_now); /* check if exit block */ if (NULL != block_stack) { block_deepth_last = stack_getTop(block_stack); /* exit each block */ for (int i = 0; i < block_deepth_last - block_deepth_now; i++) { QueueObj* exit_block_queue = obj_getObj(ast, "exitBlock"); /* create an exit_block queue */ if (NULL == exit_block_queue) { obj_newObj(ast, "exitBlock", "", New_TinyObj); exit_block_queue = obj_getObj(ast, "exitBlock"); queueObj_init(exit_block_queue); } char buff[10] = {0}; char* block_type = stack_popStr(block_stack, buff); /* push exit block type to exit_block queue */ queueObj_pushStr(exit_block_queue, block_type); } } line_start = line + block_deepth_now * 4; stmt = line_start; // "while" "if" "elif" for (uint32_t i = 0; i < sizeof(normal_keywords) / 7; i++) { char* keyword = (char*)normal_keywords[i]; uint8_t keyword_len = strGetSize(keyword); if (strIsStartWith(line_start, keyword) && (line_start[keyword_len] == ' ')) { stmt = strsCut(&buffs, line_start, ' ', ':'); AST_setThisNode(ast, "block", keyword); if (NULL != block_stack) { stack_pushStr(block_stack, keyword); } goto block_matched; } } /* contral keyward */ /* "break", "continue" */ for (uint32_t i = 0; i < sizeof(control_keywords) / 8; i++) { char* keyward = (char*)control_keywords[i]; uint8_t keyward_size = strGetSize(keyward); if ((strIsStartWith(line_start, keyward)) && ((line_start[keyward_size] == ' ') || (line_start[keyward_size] == 0))) { AST_setThisNode(ast, keyward, ""); stmt = ""; goto block_matched; } } /* for */ if (strIsStartWith(line_start, "for ")) { Args* list_buffs = New_strBuff(); char* line_buff = strsCopy(list_buffs, line_start + 4); char* arg_in = strsPopToken(list_buffs, line_buff, ' '); AST_setThisNode(ast, "arg_in", arg_in); strsPopToken(list_buffs, line_buff, ' '); char* list_in = strsPopToken(list_buffs, line_buff, ':'); list_in = strsAppend(list_buffs, "iter(", list_in); list_in = strsAppend(list_buffs, list_in, ")"); list_in = strsCopy(&buffs, list_in); args_deinit(list_buffs); AST_setThisNode(ast, "block", "for"); AST_setThisNode(ast, "list_in", list_in); if (NULL != block_stack) { stack_pushStr(block_stack, "for"); } stmt = list_in; goto block_matched; } /* else */ if (strIsStartWith(line_start, "else")) { if ((line_start[4] == ' ') || (line_start[4] == ':')) { stmt = ""; AST_setThisNode(ast, "block", "else"); if (NULL != block_stack) { stack_pushStr(block_stack, "else"); } } goto block_matched; } #if PIKA_SYNTAX_EXCEPTION_ENABLE /* try */ if (strIsStartWith(line_start, "try")) { if ((line_start[3] == ' ') || (line_start[3] == ':')) { stmt = ""; AST_setThisNode(ast, "block", "try"); if (NULL != block_stack) { stack_pushStr(block_stack, "try"); } } goto block_matched; } /* except */ if (strIsStartWith(line_start, "except")) { if ((line_start[6] == ' ') || (line_start[6] == ':')) { stmt = ""; AST_setThisNode(ast, "block", "except"); if (NULL != block_stack) { stack_pushStr(block_stack, "except"); } } goto block_matched; } #endif if (strEqu(line_start, "return")) { AST_setThisNode(ast, "return", ""); stmt = ""; goto block_matched; } if (strIsStartWith(line_start, "return ")) { char* lineBuff = strsCopy(&buffs, line_start); strsPopToken(&buffs, lineBuff, ' '); stmt = lineBuff; AST_setThisNode(ast, "return", ""); goto block_matched; } #if PIKA_SYNTAX_EXCEPTION_ENABLE if (strEqu(line_start, "raise")) { AST_setThisNode(ast, "raise", ""); stmt = "RuntimeError"; goto block_matched; } if (strIsStartWith(line_start, "raise ")) { char* lineBuff = strsCopy(&buffs, line_start); strsPopToken(&buffs, lineBuff, ' '); stmt = lineBuff; if (strEqu("", stmt)) { stmt = "RuntimeError"; } AST_setThisNode(ast, "raise", ""); goto block_matched; } #endif if (strIsStartWith(line_start, "global ")) { stmt = ""; char* global_list = line_start + 7; global_list = strsGetCleanCmd(&buffs, global_list); AST_setThisNode(ast, "global", global_list); goto block_matched; } if (strIsStartWith(line_start, "del ")) { stmt = ""; char* del_dir = line_start + sizeof("del ") - 1; del_dir = strsGetCleanCmd(&buffs, del_dir); AST_setThisNode(ast, "del", del_dir); goto block_matched; } if (strIsStartWith(line_start, (char*)"def ")) { stmt = ""; char* declear = strsCut(&buffs, line_start, ' ', ':'); declear = strsGetCleanCmd(&buffs, declear); AST_setThisNode(ast, "block", "def"); AST_setThisNode(ast, "declear", declear); if (NULL != block_stack) { stack_pushStr(block_stack, "def"); } goto block_matched; } if (strIsStartWith(line_start, (char*)"class ")) { stmt = ""; char* declear = strsCut(&buffs, line_start, ' ', ':'); declear = strsGetCleanCmd(&buffs, declear); AST_setThisNode(ast, "block", "class"); AST_setThisNode(ast, "declear", declear); if (NULL != block_stack) { stack_pushStr(block_stack, "class"); } goto block_matched; } block_matched: if (NULL == stmt) { AST_deinit(ast); ast = NULL; goto exit; } stmt = strsGetCleanCmd(&buffs, stmt); ast = AST_parseStmt(ast, stmt); goto exit; exit: strsDeinit(&buffs); return ast; } #if PIKA_SYNTAX_IMPORT_EX_ENABLE static char* Suger_import(Args* buffs_p, char* line) { Args buffs = {0}; char* line_out = line; char* alias = NULL; char* origin = NULL; char* stmt = line + 7; if (!strIsStartWith(line, "import ")) { line_out = line; goto exit; } ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); /* defaut set the 'origin' as the first token */ if (ps.iter_index == 1) { origin = strsCopy(&buffs, ps.token1.pyload); } if (strEqu(ps.token2.pyload, " as ")) { origin = strsCopy(&buffs, ps.token1.pyload); } if (strEqu(ps.token1.pyload, " as ")) { alias = strsCopy(&buffs, ps.token2.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); /* only import, not 'as' */ if (NULL == alias) { line_out = line; goto exit; } /* 'import' and 'as' */ line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s", origin, alias, origin); line_out = strsCopy(buffs_p, line_out); goto exit; exit: strsDeinit(&buffs); return line_out; } #endif #if PIKA_SYNTAX_IMPORT_EX_ENABLE static char* Suger_from(Args* buffs_p, char* line) { Args buffs = {0}; char* line_out = line; char* class = NULL; char* module = NULL; char* alias = NULL; char* stmt = line + 5; if (!strIsStartWith(line, "from ")) { line_out = line; goto exit; } ParserState_forEachToken(ps, stmt) { ParserState_iterStart(&ps); if (strEqu(ps.token2.pyload, " import ")) { module = strsCopy(&buffs, ps.token1.pyload); } if (strEqu(ps.token1.pyload, " import ")) { class = strsCopy(&buffs, ps.token2.pyload); } if (strEqu(ps.token1.pyload, " as ")) { alias = strsCopy(&buffs, ps.token2.pyload); } ParserState_iterEnd(&ps); } ParserState_deinit(&ps); if (NULL == module) { line_out = strsCopy(buffs_p, ""); goto exit; } if (NULL == class) { line_out = strsCopy(buffs_p, ""); goto exit; } if (NULL == alias) { alias = class; } /* skip PikaObj */ if (strEqu(module, "PikaObj")) { line_out = strsCopy(buffs_p, ""); goto exit; } line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s.%s", module, alias, module, class); line_out = strsCopy(buffs_p, line_out); exit: strsDeinit(&buffs); return line_out; } #endif static char* Parser_linePreProcess(Args* buffs_p, char* line) { line = Parser_removeAnnotation(line); /* check syntex error */ if (Lexer_isError(line)) { line = NULL; goto exit; } /* process EOL */ line = strsDeleteChar(buffs_p, line, '\r'); #if PIKA_SYNTAX_IMPORT_EX_ENABLE line = Suger_import(buffs_p, line); line = Suger_from(buffs_p, line); #endif exit: return line; } char* Parser_LineToAsm(Args* buffs_p, char* line, Stack* blockStack) { char* ASM = NULL; AST* ast = NULL; uint8_t line_num = 0; /* pre process */ line = Parser_linePreProcess(buffs_p, line); if (NULL == line) { /* preprocess error */ goto exit; } if (strEqu("@annontation", line)) { ASM = ""; goto exit; } /* solve more lines preprocess may generate more lines */ line_num = strCountSign(line, '\n') + 1; for (int i = 0; i < line_num; i++) { char* single_line = strsPopToken(buffs_p, line, '\n'); /* parse tokens to AST */ ast = AST_parseLine(single_line, blockStack); /* gen ASM from AST */ if (ASM == NULL) { ASM = AST_toPikaASM(ast, buffs_p); } else { ASM = strsAppend(buffs_p, ASM, AST_toPikaASM(ast, buffs_p)); } if (NULL != ast) { AST_deinit(ast); } } exit: return ASM; } static int Parser_isVoidLine(char* line) { for (uint32_t i = 0; i < strGetSize(line); i++) { if (line[i] != ' ') { return 0; } } return 1; } static uint8_t Parser_checkIsMultiComment(char* line) { for (uint32_t i = 0; i < strGetSize(line); i++) { /* not match ' or " */ if ((line[i] != '\'') && (line[i] != '"')) { continue; } /* not match ''' or """ */ if (!((line[i + 1] == line[i]) && (line[i + 2] == line[i]))) { continue; } /* check char befor the ''' or """ */ if (!((0 == i) || (line[i - 1] == ' '))) { continue; } /* check char after the ''' or """ */ if (!((line[i + 3] == ' ') || (line[i + 3] == 0))) { continue; } /* mached */ return 1; } /* not mached */ return 0; } char* Parser_parsePyLines(Args* outBuffs, ByteCodeFrame* bytecode_frame, char* py_lines) { Stack block_stack; stack_init(&block_stack); Arg* asm_buff = arg_newStr(""); uint32_t lines_offset = 0; uint32_t lines_size = strGetSize(py_lines); uint16_t lines_num = strCountSign(py_lines, '\n'); uint16_t lines_index = 0; uint8_t is_in_multi_comment = 0; Arg* line_connection_arg = arg_newStr(""); uint8_t is_line_connection = 0; char* out_ASM = NULL; char* single_ASM; uint32_t line_size = 0; /* parse each line */ while (1) { lines_index++; Args buffs = {0}; /* get single line by pop multiline */ char* line_origin = strsGetFirstToken(&buffs, py_lines + lines_offset, '\n'); char* line = strsCopy(&buffs, line_origin); /* line connection */ if (is_line_connection) { is_line_connection = 0; line_connection_arg = arg_strAppend(line_connection_arg, line); line = strsCopy(&buffs, arg_getStr(line_connection_arg)); /* reflash the line_connection_arg */ arg_deinit(line_connection_arg); line_connection_arg = arg_newStr(""); } /* check connection */ if ('\\' == line[strGetSize(line) - 1]) { /* remove the '\\' */ line[strGetSize(line) - 1] = '\0'; is_line_connection = 1; line_connection_arg = arg_strAppend(line_connection_arg, line); goto next_line; } /* support Tab */ line = strsReplace(&buffs, line, "\t", " "); /* filter for not end \n */ if (lines_index != lines_num) { if (Parser_isVoidLine(line)) { goto next_line; } } /* filter for multiline comment ''' or """ */ if (Parser_checkIsMultiComment(line)) { is_in_multi_comment = ~is_in_multi_comment; goto next_line; } /* skipe multiline comment */ if (is_in_multi_comment) { goto next_line; } /* parse single Line to Asm */ single_ASM = Parser_LineToAsm(&buffs, line, &block_stack); #if PIKA_DEBUG pika_assert(NULL != single_ASM); #endif if (NULL == single_ASM) { out_ASM = NULL; strsDeinit(&buffs); goto exit; } if (NULL == bytecode_frame) { /* store ASM */ asm_buff = arg_strAppend(asm_buff, single_ASM); } else if (NULL == outBuffs) { /* store ByteCode */ byteCodeFrame_appendFromAsm(bytecode_frame, single_ASM); } next_line: line_size = strGetSize(line_origin); lines_offset = lines_offset + line_size + 1; strsDeinit(&buffs); /* exit when finished */ if (lines_offset >= lines_size) { break; } } if (NULL != outBuffs) { /* load stored ASM */ out_ASM = strsCopy(outBuffs, arg_getStr(asm_buff)); } else { out_ASM = (char*)1; } goto exit; exit: if (NULL != asm_buff) { arg_deinit(asm_buff); } if (NULL != line_connection_arg) { arg_deinit(line_connection_arg); } stack_deinit(&block_stack); return out_ASM; }; int bytecodeFrame_fromMultiLine(ByteCodeFrame* bytecode_frame, char* multi_line) { if (NULL == Parser_parsePyLines(NULL, bytecode_frame, multi_line)) { /* error */ return 1; } /* succeed */ return 0; }; char* Parser_multiLineToAsm(Args* outBuffs, char* multi_line) { return Parser_parsePyLines(outBuffs, NULL, multi_line); } char* Parser_fileToAsm(Args* outBuffs, char* filename) { Args buffs = {0}; Arg* file_arg = arg_loadFile(NULL, filename); pika_assert(NULL != file_arg); if (NULL == file_arg) { return NULL; } char* lines = (char*)arg_getBytes(file_arg); /* replace the "\r\n" to "\n" */ lines = strsReplace(&buffs, lines, "\r\n", "\n"); /* clear the void line */ lines = strsReplace(&buffs, lines, "\n\n", "\n"); /* add '\n' at the end */ lines = strsAppend(&buffs, lines, "\n\n"); char* res = Parser_multiLineToAsm(&buffs, lines); arg_deinit(file_arg); strsDeinit(&buffs); return res; } char* AST_appandPikaASM(AST* ast, AST* subAst, Args* outBuffs, char* pikaAsm) { int deepth = obj_getInt(ast, "deepth"); Args buffs = {0}; while (1) { QueueObj* subStmt = queueObj_popObj(subAst); if (NULL == subStmt) { break; } obj_setInt(ast, "deepth", deepth + 1); pikaAsm = AST_appandPikaASM(ast, subStmt, &buffs, pikaAsm); } char* buff = args_getBuff(&buffs, PIKA_SPRINTF_BUFF_SIZE); /* append the syntax item */ for (size_t i = 0; i < sizeof(syntexItemList) / sizeof(SyntaxItem); i++) { char* astNodeVal = obj_getStr(subAst, syntexItemList[i].astNodeName); if (NULL != astNodeVal) { /* e.g. "0 RUN print \n" */ __platform_sprintf(buff, "%d %s ", deepth, syntexItemList[i].asmCode); Arg* abuff = arg_newStr(buff); if (syntexItemList[i].isUseNodeValue) { abuff = arg_strAppend(abuff, astNodeVal); } abuff = arg_strAppend(abuff, "\n"); pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(abuff)); arg_deinit(abuff); } } obj_setInt(ast, "deepth", deepth - 1); goto exit; exit: pikaAsm = strsCopy(outBuffs, pikaAsm); strsDeinit(&buffs); return pikaAsm; } char* ASM_addBlockDeepth(AST* ast, Args* buffs_p, char* pikaAsm, uint8_t deepthOffset) { pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"B"); char buff[11]; pikaAsm = strsAppend( buffs_p, pikaAsm, fast_itoa(buff, obj_getInt(ast, "blockDeepth") + deepthOffset)); pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"\n"); return pikaAsm; } char* AST_toPikaASM(AST* ast, Args* outBuffs) { Args buffs = {0}; char* pikaAsm = strsCopy(&buffs, ""); QueueObj* exitBlock; uint8_t is_block_matched; if (NULL == ast) { pikaAsm = NULL; goto exit; } exitBlock = obj_getObj(ast, "exitBlock"); /* exiting from block */ if (exitBlock != NULL) { while (1) { uint8_t block_type_num = obj_getInt(exitBlock, "top") - obj_getInt(exitBlock, "bottom") - 1; char* block_type = queueObj_popStr(exitBlock); if (NULL == block_type) { break; } /* goto the while start when exit while block */ if (strEqu(block_type, "while")) { pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n"); } #if PIKA_SYNTAX_EXCEPTION_ENABLE /* goto the while start when exit while block */ if (strEqu(block_type, "try")) { pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NTR \n"); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 GER \n"); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JEZ 2\n"); } if (strEqu(block_type, "except")) { pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 SER 0\n"); } #endif /* goto the while start when exit while block */ if (strEqu(block_type, "for")) { pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n"); /* garbage collect for the list */ pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num); char _l_x[] = "_lx"; char block_deepth_char = obj_getInt(ast, "blockDeepth") + block_type_num + '0'; _l_x[sizeof(_l_x) - 2] = block_deepth_char; pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 DEL "); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)_l_x); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"\n"); } /* return when exit method */ if (strEqu(block_type, "def")) { pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num + 1); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n"); } /* return when exit class */ if (strEqu(block_type, "class")) { pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num + 1); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RAS $origin\n"); pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 1); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NEW self\n"); pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n"); } } } /* add block deepth */ /* example: B0 */ pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0); /* "deepth" is invoke deepth, not the blockDeepth */ obj_setInt(ast, "deepth", 0); /* match block */ is_block_matched = 0; if (strEqu(obj_getStr(ast, "block"), "for")) { /* for "for" iter */ char* arg_in = obj_getStr(ast, "arg_in"); Arg* newAsm_arg = arg_newStr(""); char _l_x[] = "_lx"; char block_deepth_char = '0'; block_deepth_char += obj_getInt(ast, "blockDeepth"); _l_x[sizeof(_l_x) - 2] = block_deepth_char; /* init iter */ /* get the iter(_l) */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); newAsm_arg = arg_strAppend(newAsm_arg, "0 OUT "); newAsm_arg = arg_strAppend(newAsm_arg, _l_x); newAsm_arg = arg_strAppend(newAsm_arg, "\n"); pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg)); arg_deinit(newAsm_arg); newAsm_arg = arg_newStr(""); /* get next */ /* run next(_l) */ /* check item is exist */ pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0); newAsm_arg = arg_strAppend(newAsm_arg, "0 RUN "); newAsm_arg = arg_strAppend(newAsm_arg, _l_x); newAsm_arg = arg_strAppend(newAsm_arg, ".__next__\n" "0 OUT "); newAsm_arg = arg_strAppend(newAsm_arg, arg_in); newAsm_arg = arg_strAppend(newAsm_arg, "\n" "0 EST "); newAsm_arg = arg_strAppend(newAsm_arg, arg_in); newAsm_arg = arg_strAppend(newAsm_arg, "\n0 JEZ 2\n"); pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg)); arg_deinit(newAsm_arg); is_block_matched = 1; goto exit; } if (strEqu(obj_getStr(ast, "block"), "while")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 JEZ 2\n"); is_block_matched = 1; goto exit; } if (strEqu(obj_getStr(ast, "block"), "if")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 JEZ 1\n"); is_block_matched = 1; goto exit; } if (strEqu(obj_getStr(ast, "block"), "else")) { pikaAsm = strsAppend(&buffs, pikaAsm, "0 NEL 1\n"); goto exit; } #if PIKA_SYNTAX_EXCEPTION_ENABLE if (strEqu(obj_getStr(ast, "block"), "try")) { pikaAsm = strsAppend(&buffs, pikaAsm, "0 TRY \n"); goto exit; } #endif if (strEqu(obj_getStr(ast, "block"), "elif")) { /* skip if __else is 0 */ pikaAsm = strsAppend(&buffs, pikaAsm, "0 NEL 1\n"); /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); /* skip if stmt is 0 */ pikaAsm = strsAppend(&buffs, pikaAsm, "0 JEZ 1\n"); is_block_matched = 1; goto exit; } if (strEqu(obj_getStr(ast, "block"), "def")) { pikaAsm = strsAppend(&buffs, pikaAsm, "0 DEF "); pikaAsm = strsAppend(&buffs, pikaAsm, obj_getStr(ast, "declear")); pikaAsm = strsAppend(&buffs, pikaAsm, "\n" "0 JMP 1\n"); is_block_matched = 1; goto exit; } if (strEqu(obj_getStr(ast, "block"), "class")) { char* declear = obj_getStr(ast, "declear"); char* thisClass = NULL; char* superClass = NULL; if (strIsContain(declear, '(')) { thisClass = strsGetFirstToken(&buffs, declear, '('); superClass = strsCut(&buffs, declear, '(', ')'); } else { thisClass = declear; superClass = ""; } if (strEqu("", superClass)) { /* default superClass */ superClass = "TinyObj"; } if (strEqu("TinyObj", superClass)) { /* default superClass */ superClass = "TinyObj"; } pikaAsm = strsAppend(&buffs, pikaAsm, "0 CLS "); pikaAsm = strsAppend(&buffs, pikaAsm, strsAppend(&buffs, thisClass, "()\n" "0 JMP 1\n")); char block_deepth_str[] = "B0\n"; /* goto deeper block */ block_deepth_str[1] += obj_getInt(ast, "blockDeepth") + 1; pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str); pikaAsm = strsAppend(&buffs, pikaAsm, "0 RUN "); pikaAsm = strsAppend(&buffs, pikaAsm, superClass); pikaAsm = strsAppend(&buffs, pikaAsm, "\n"); pikaAsm = strsAppend(&buffs, pikaAsm, "0 OUT self\n"); pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str); pikaAsm = strsAppend(&buffs, pikaAsm, "0 RAS self\n"); is_block_matched = 1; goto exit; } if (obj_isArgExist(ast, "return")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 RET \n"); is_block_matched = 1; goto exit; } #if PIKA_SYNTAX_EXCEPTION_ENABLE if (obj_isArgExist(ast, "raise")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 RIS \n"); is_block_matched = 1; goto exit; } #endif if (obj_isArgExist(ast, "global")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 GLB "); pikaAsm = strsAppend(&buffs, pikaAsm, obj_getStr(ast, "global")); pikaAsm = strsAppend(&buffs, pikaAsm, "\n"); is_block_matched = 1; goto exit; } if (obj_isArgExist(ast, "del")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 DEL "); pikaAsm = strsAppend(&buffs, pikaAsm, obj_getStr(ast, "del")); pikaAsm = strsAppend(&buffs, pikaAsm, "\n"); is_block_matched = 1; goto exit; } if (obj_isArgExist(ast, "break")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 BRK \n"); is_block_matched = 1; goto exit; } if (obj_isArgExist(ast, "continue")) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); pikaAsm = strsAppend(&buffs, pikaAsm, "0 CTN \n"); is_block_matched = 1; goto exit; } exit: if (NULL == pikaAsm) { strsDeinit(&buffs); return NULL; } if (!is_block_matched) { /* parse stmt ast */ pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm); } /* output pikaAsm */ pikaAsm = strsCopy(outBuffs, pikaAsm); strsDeinit(&buffs); return pikaAsm; } int32_t AST_deinit(AST* ast) { return obj_deinit(ast); } ByteCodeFrame* byteCodeFrame_appendFromAsm(ByteCodeFrame* self, char* pikaAsm) { Asmer asmer = { .asm_code = pikaAsm, .block_deepth_now = 0, .is_new_line = 0, .line_pointer = pikaAsm, }; uint16_t const_pool_offset; uint16_t exist_offset; int invoke_deepth_int = 0; for (int i = 0; i < strCountSign(pikaAsm, '\n'); i++) { Args buffs = {0}; char* line = strsGetLine(&buffs, asmer.line_pointer); char* data = NULL; char ins_str[4] = ""; char invoke_deepth[3] = ""; uint8_t space_num = 0; uint8_t invoke_deepth_i = 0; uint8_t ins_str_i = 0; Arg* line_buff = arg_newStr(line); strsDeinit(&buffs); line = arg_getStr(line_buff); InstructUnit ins_unit = {0}; /* remove '\r' */ if (line[strGetSize(line) - 1] == '\r') { line[strGetSize(line) - 1] = 0; } /* process block deepth flag*/ if ('B' == line[0]) { asmer.block_deepth_now = fast_atoi(line + 1); asmer.is_new_line = 1; goto next_line; } /* process each ins */ /* get constPool offset */ const_pool_offset = 0; for (int i = 0; i < (int)strGetSize(line); i++) { if (space_num < 2) { if (line[i] == ' ') { space_num++; if (space_num == 2) { data = line + i + 1; break; } continue; } } if (space_num == 0) { invoke_deepth[invoke_deepth_i++] = line[i]; continue; } if (space_num == 1) { ins_str[ins_str_i++] = line[i]; continue; } } exist_offset = constPool_getOffsetByData(&(self->const_pool), data); /* get const offset */ if (strEqu(data, "")) { /* not need const value */ const_pool_offset = 0; } else if (65535 == exist_offset) { /* push new const value */ const_pool_offset = constPool_getLastOffset(&(self->const_pool)); /* load const to const pool buff */ constPool_append(&(self->const_pool), data); } else { /* use exist const value */ const_pool_offset = exist_offset; } invoke_deepth_int = fast_atoi(invoke_deepth); /* load Asm to byte code unit */ instructUnit_setBlockDeepth(&ins_unit, asmer.block_deepth_now); instructUnit_setInvokeDeepth(&ins_unit, invoke_deepth_int); instructUnit_setConstPoolIndex(&ins_unit, const_pool_offset); instructUnit_setInstruct(&ins_unit, pikaVM_getInstructFromAsm(ins_str)); if (asmer.is_new_line) { instructUnit_setIsNewLine(&ins_unit, 1); asmer.is_new_line = 0; } /* append instructUnit to instructArray */ instructArray_append(&(self->instruct_array), &ins_unit); next_line: /* point to next line */ asmer.line_pointer += strGetLineSize(asmer.line_pointer) + 1; arg_deinit(line_buff); } return self; } char* Parser_byteCodeToAsm(Args* outBuffs, char* pikaByteCode) { return NULL; } void Parser_compilePyToBytecodeArray(char* lines) { ByteCodeFrame bytecode_frame; byteCodeFrame_init(&bytecode_frame); bytecodeFrame_fromMultiLine(&bytecode_frame, lines); /* do something */ byteCodeFrame_print(&bytecode_frame); __platform_printf("\n\n/* clang-format off */\n"); __platform_printf("PIKA_PYTHON(\n"); __platform_printf("%s\n", lines); __platform_printf(")\n"); __platform_printf("/* clang-format on */\n"); byteCodeFrame_printAsArray(&bytecode_frame); /* deinit */ byteCodeFrame_deinit(&bytecode_frame); __platform_printf("\n\n"); }