2022-08-30 18:47:49 +08:00

2703 lines
84 KiB
C

/*
* This file is part of the PikaScript project.
* http://github.com/pikastech/pikascript
*
* MIT License
*
* Copyright (c) 2021 lyon 李昂 liang6516@outlook.com
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "PikaParser.h"
#include "BaseObj.h"
#include "PikaObj.h"
#include "dataQueue.h"
#include "dataQueueObj.h"
#include "dataStack.h"
#include "dataStrs.h"
/* local head */
typedef QueueObj AST;
char* AST_toPikaASM(AST* ast, Args* outBuffs);
char* Lexer_parseLine(Args* outBuffs, char* stmt);
int32_t AST_deinit(AST* ast);
char* Parser_linesToAsm(Args* outBuffs, char* multiLine);
uint8_t Parser_isContainToken(char* tokens,
enum TokenType token_type,
char* pyload);
void Cursor_init(struct Cursor* ps);
void Cursor_parse(struct Cursor* ps, char* stmt);
void Cursor_deinit(struct Cursor* ps);
void Cursor_beforeIter(struct Cursor* ps);
void Cursor_iterStart(struct Cursor* ps);
void Cursor_iterEnd(struct Cursor* ps);
char* Parser_popToken(Args* buffs_p, char* tokens);
uint16_t Tokens_getSize(char* tokens) {
if (strEqu("", tokens)) {
return 0;
}
return strCountSign(tokens, 0x1F) + 1;
}
char* strsPopTokenWithSkip_byStr(Args* outBuffs,
char* stmts,
char* str,
char skipStart,
char skipEnd) {
uint8_t divider_index = 0;
Arg* keeped_arg = arg_newStr("");
Arg* poped_arg = arg_newStr("");
Cursor_forEachToken(ps, stmts) {
Cursor_iterStart(&ps);
if (ps.branket_deepth == 0) {
if (strEqu(str, ps.token1.pyload)) {
divider_index = ps.iter_index;
}
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
Cursor_forEachTokenExistPs(ps, stmts) {
Cursor_iterStart(&ps);
if (ps.iter_index < divider_index) {
poped_arg = arg_strAppend(poped_arg, ps.token1.pyload);
}
if (ps.iter_index > divider_index) {
keeped_arg = arg_strAppend(keeped_arg, ps.token1.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
char* keeped = arg_getStr(keeped_arg);
char* poped = strsCopy(outBuffs, arg_getStr(poped_arg));
__platform_memcpy(stmts, keeped, strGetSize(keeped) + 1);
arg_deinit(poped_arg);
arg_deinit(keeped_arg);
return poped;
}
char* strsGetCleanCmd(Args* outBuffs, char* cmd) {
pika_assert(cmd != NULL);
int32_t size = strGetSize(cmd);
/* lexer may generate more chars than input */
char* strOut = args_getBuff(outBuffs, size * 2);
int32_t iOut = 0;
Cursor_forEachToken(ps, cmd) {
Cursor_iterStart(&ps);
for (uint16_t k = 0; k < strGetSize(ps.token1.pyload); k++) {
strOut[iOut] = ps.token1.pyload[k];
iOut++;
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
/* add \0 */
strOut[iOut] = 0;
return strOut;
}
char* strsDeleteBetween(Args* buffs_p, char* strIn, char begin, char end) {
int32_t size = strGetSize(strIn);
char* strOut = args_getBuff(buffs_p, size);
uint8_t deepth = 0;
uint32_t iOut = 0;
for (int i = 0; i < size; i++) {
if (end == strIn[i]) {
deepth--;
}
if (0 == deepth) {
strOut[iOut] = strIn[i];
iOut++;
}
if (begin == strIn[i]) {
deepth++;
}
}
strOut[iOut] = 0;
return strOut;
}
static uint8_t Lexer_isError(char* line) {
Args buffs = {0};
uint8_t res = 0; /* not error */
char* tokens = Lexer_parseLine(&buffs, line);
if (NULL == tokens) {
res = 1; /* lex error */
goto exit;
}
goto exit;
exit:
strsDeinit(&buffs);
return res;
}
static char* __removeTokensBetween(Args* outBuffs,
char* input,
char* token_pyload1,
char* token_pyload2) {
Args buffs = {0};
uint8_t block_deepth = 0;
char* output = "";
Cursor_forEachToken(ps, input) {
Cursor_iterStart(&ps);
if (strEqu(token_pyload1, ps.token1.pyload)) {
if (block_deepth == 0) {
output = strsAppend(&buffs, output, ps.token1.pyload);
}
block_deepth++;
}
if (strEqu(token_pyload2, ps.token1.pyload)) {
block_deepth--;
}
if (block_deepth == 0) {
output = strsAppend(&buffs, output, ps.token1.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
output = strsCopy(outBuffs, output);
strsDeinit(&buffs);
return output;
}
static enum StmtType Lexer_matchStmtType(char* right) {
Args buffs = {0};
enum StmtType stmtType = STMT_none;
char* rightWithoutSubStmt = __removeTokensBetween(&buffs, right, "(", ")");
rightWithoutSubStmt =
__removeTokensBetween(&buffs, rightWithoutSubStmt, "[", "]");
rightWithoutSubStmt =
__removeTokensBetween(&buffs, rightWithoutSubStmt, "{", "}");
PIKA_BOOL is_get_operator = PIKA_FALSE;
PIKA_BOOL is_get_method = PIKA_FALSE;
PIKA_BOOL is_get_string = PIKA_FALSE;
PIKA_BOOL is_get_bytes = PIKA_FALSE;
PIKA_BOOL is_get_number = PIKA_FALSE;
PIKA_BOOL is_get_symbol = PIKA_FALSE;
PIKA_BOOL is_get_list = PIKA_FALSE;
PIKA_BOOL is_get_slice = PIKA_FALSE;
PIKA_BOOL is_get_dict = PIKA_FALSE;
PIKA_BOOL is_get_import = PIKA_FALSE;
PIKA_BOOL is_get_chain = PIKA_FALSE;
Cursor_forEachToken(ps, rightWithoutSubStmt) {
Cursor_iterStart(&ps);
/* collect type */
if (strEqu(ps.token1.pyload, " import ")) {
is_get_import = PIKA_TRUE;
goto iter_continue;
}
if (strEqu(ps.token2.pyload, "[")) {
/* (symble | iteral | <]> | <)>) + <[> */
if (TOKEN_symbol == ps.token1.type ||
TOKEN_literal == ps.token1.type ||
strEqu(ps.token1.pyload, "]") ||
strEqu(ps.token1.pyload, ")")) {
is_get_slice = PIKA_TRUE;
goto iter_continue;
}
/* ( <,> | <=> ) + <[> */
is_get_list = PIKA_TRUE;
}
if (strEqu(ps.token1.pyload, "[") && ps.iter_index == 1) {
/* VOID + <[> */
is_get_list = PIKA_TRUE;
goto iter_continue;
}
if (strEqu(ps.token1.pyload, "...")) {
goto iter_continue;
}
if (strIsStartWith(ps.token1.pyload, ".")) {
if (ps.iter_index != 1) {
is_get_chain = PIKA_TRUE;
goto iter_continue;
}
}
if (strEqu(ps.token1.pyload, "{")) {
is_get_dict = PIKA_TRUE;
goto iter_continue;
}
if (ps.token1.type == TOKEN_operator) {
is_get_operator = PIKA_TRUE;
goto iter_continue;
}
/* <(> */
if (strEqu(ps.token1.pyload, "(")) {
is_get_method = PIKA_TRUE;
goto iter_continue;
}
if (ps.token1.type == TOKEN_literal) {
if (ps.token1.pyload[0] == '\'' || ps.token1.pyload[0] == '"') {
is_get_string = PIKA_TRUE;
goto iter_continue;
}
if (ps.token1.pyload[1] == '\'' || ps.token1.pyload[1] == '"') {
if (ps.token1.pyload[0] == 'b') {
is_get_bytes = PIKA_TRUE;
goto iter_continue;
}
}
is_get_number = PIKA_TRUE;
goto iter_continue;
}
if (ps.token1.type == TOKEN_symbol) {
is_get_symbol = PIKA_TRUE;
goto iter_continue;
}
iter_continue:
Cursor_iterEnd(&ps);
}
if (is_get_import) {
stmtType = STMT_import;
goto exit;
}
if (is_get_operator) {
stmtType = STMT_operator;
goto exit;
}
if (is_get_chain) {
stmtType = STMT_chain;
goto exit;
}
if (is_get_slice) {
stmtType = STMT_slice;
goto exit;
}
if (is_get_list) {
stmtType = STMT_list;
goto exit;
}
if (is_get_dict) {
stmtType = STMT_dict;
goto exit;
}
if (is_get_method) {
stmtType = STMT_method;
goto exit;
}
if (is_get_string) {
stmtType = STMT_string;
goto exit;
}
if (is_get_bytes) {
stmtType = STMT_bytes;
goto exit;
}
if (is_get_number) {
stmtType = STMT_number;
goto exit;
}
if (is_get_symbol) {
stmtType = STMT_reference;
goto exit;
}
exit:
Cursor_deinit(&ps);
strsDeinit(&buffs);
return stmtType;
}
char* Lexer_printTokens(Args* outBuffs, char* tokens) {
pika_assert(tokens);
/* init */
Args buffs = {0};
char* printOut = strsCopy(&buffs, "");
/* process */
uint16_t token_size = Tokens_getSize(tokens);
for (uint16_t i = 0; i < token_size; i++) {
char* token = Parser_popToken(&buffs, tokens);
if (token[0] == TOKEN_operator) {
printOut = strsAppend(&buffs, printOut, "{opt}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_devider) {
printOut = strsAppend(&buffs, printOut, "{dvd}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_symbol) {
printOut = strsAppend(&buffs, printOut, "{sym}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
if (token[0] == TOKEN_literal) {
printOut = strsAppend(&buffs, printOut, "{lit}");
printOut = strsAppend(&buffs, printOut, token + 1);
}
}
/* out put */
printOut = strsCopy(outBuffs, printOut);
strsDeinit(&buffs);
return printOut;
}
uint8_t Parser_checkIsDirect(char* str) {
Args buffs = {0};
char* tokens = Lexer_parseLine(&buffs, str);
uint8_t res = 0;
pika_assert(NULL != tokens);
if (Parser_isContainToken(tokens, TOKEN_operator, "=")) {
res = 1;
goto exit;
}
exit:
strsDeinit(&buffs);
return res;
}
Arg* Lexer_setToken(Arg* tokens_arg,
enum TokenType token_type,
char*
operator) {
Args buffs = {0};
char token_type_buff[3] = {0};
token_type_buff[0] = 0x1F;
token_type_buff[1] = token_type;
char* tokens = arg_getStr(tokens_arg);
tokens = strsAppend(&buffs, tokens, token_type_buff);
tokens = strsAppend(&buffs, tokens, operator);
Arg* new_tokens_arg = arg_setStr(tokens_arg, "", tokens);
arg_deinit(tokens_arg);
strsDeinit(&buffs);
return new_tokens_arg;
}
Arg* Lexer_setSymbel(Arg* tokens_arg,
char* stmt,
int32_t i,
int32_t* symbol_start_index) {
Args buffs = {0};
char* symbol_buff = NULL;
if (-1 == *symbol_start_index) {
/* no found symbol start index */
goto exit;
}
/* nothing to add symbel */
if (i == *symbol_start_index) {
goto exit;
}
symbol_buff = args_getBuff(&buffs, i - *symbol_start_index);
__platform_memcpy(symbol_buff, stmt + *symbol_start_index,
i - *symbol_start_index);
/* literal */
if ((symbol_buff[0] == '\'') || (symbol_buff[0] == '"')) {
/* "" or '' */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
if ((symbol_buff[0] >= '0') && (symbol_buff[0] <= '9')) {
/* number */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
if ((symbol_buff[0] == 'b') &&
((symbol_buff[1] == '\'') || (symbol_buff[1] == '"'))) {
/* b"" or b'' */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_literal, symbol_buff);
goto exit;
}
/* symbol */
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_symbol, symbol_buff);
goto exit;
exit:
*symbol_start_index = -1;
strsDeinit(&buffs);
return tokens_arg;
}
/* tokens is devided by space */
/* a token is [TOKENTYPE|(CONTENT)] */
char* Lexer_parseLine(Args* outBuffs, char* stmt) {
/* init */
Arg* tokens_arg = New_arg(NULL);
tokens_arg = arg_setStr(tokens_arg, "", "");
int32_t size = strGetSize(stmt);
uint8_t bracket_deepth = 0;
uint8_t cn2 = 0;
uint8_t cn1 = 0;
uint8_t c0 = 0;
uint8_t c1 = 0;
uint8_t c2 = 0;
uint8_t c3 = 0;
uint8_t c4 = 0;
uint8_t c5 = 0;
uint8_t c6 = 0;
int32_t symbol_start_index = -1;
int is_in_string = 0;
int is_number = 0;
char* tokens;
/* process */
for (int32_t i = 0; i < size; i++) {
/* update char */
cn2 = 0;
cn1 = 0;
c0 = stmt[i];
c1 = 0;
c2 = 0;
c3 = 0;
c4 = 0;
c5 = 0;
c6 = 0;
if (i - 2 >= 0) {
cn2 = stmt[i - 2];
}
if (i - 1 >= 0) {
cn1 = stmt[i - 1];
}
if (i + 1 < size) {
c1 = stmt[i + 1];
}
if (i + 2 < size) {
c2 = stmt[i + 2];
}
if (i + 3 < size) {
c3 = stmt[i + 3];
}
if (i + 4 < size) {
c4 = stmt[i + 4];
}
if (i + 5 < size) {
c5 = stmt[i + 5];
}
if (i + 6 < size) {
c6 = stmt[i + 6];
}
if (-1 == symbol_start_index) {
is_number = 0;
if ((c0 >= '0') && (c0 <= '9')) {
is_number = 1;
}
symbol_start_index = i;
}
/* solve string */
if (0 == is_in_string) {
if ('\'' == c0) {
if ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2)) {
/* in ' */
is_in_string = 1;
continue;
}
}
if ('"' == c0) {
if ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2)) {
/* in "" */
is_in_string = 2;
continue;
}
}
}
if (1 == is_in_string) {
if ('\'' == c0 && ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2))) {
is_in_string = 0;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1,
&symbol_start_index);
}
continue;
}
if (2 == is_in_string) {
if ('"' == c0 && ('\\' != cn1 || ('\\' == cn1 && '\\' == cn2))) {
is_in_string = 0;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i + 1,
&symbol_start_index);
}
continue;
}
/* match annotation */
if ('#' == c0) {
break;
}
/* match devider*/
if (('(' == c0) || (')' == c0) || (',' == c0) || ('[' == c0) ||
(']' == c0) || (':' == c0) || ('{' == c0) || ('}' == c0)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
char content[2] = {0};
content[0] = c0;
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_devider, content);
if (c0 == '(') {
bracket_deepth++;
}
if (c0 == ')') {
bracket_deepth--;
}
continue;
}
/* match operator */
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) ||
('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) ||
('%' == c0) || ('&' == c0) || ('|' == c0) || ('^' == c0) ||
('~' == c0)) {
if ('-' == c0 && is_number) {
if ((cn1 == 'e') || (cn1 == 'E')) {
continue;
}
}
if (('*' == c0) || ('/' == c0)) {
/*
=, **=, //
*/
if ((c0 == c1) && ('=' == c2)) {
char content[4] = {0};
content[0] = c0;
content[1] = c1;
content[2] = '=';
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 2;
continue;
}
}
/*
>>, <<, **, //
*/
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0)) {
if (c0 == c1) {
char content[3] = {0};
content[0] = c0;
content[1] = c1;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 1;
continue;
}
}
/*
>=, <=, *=, /=, +=, -=, !=, ==, %=
*/
if (('>' == c0) || ('<' == c0) || ('*' == c0) || ('/' == c0) ||
('+' == c0) || ('-' == c0) || ('!' == c0) || ('=' == c0) ||
('%' == c0)) {
if ('=' == c1) {
char content[3] = {0};
content[0] = c0;
content[1] = c1;
tokens_arg = Lexer_setSymbel(tokens_arg, stmt, i,
&symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, content);
i = i + 1;
continue;
}
}
/* single operator */
/*
+, -, *, ... /
*/
char content[2] = {0};
content[0] = c0;
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, content);
continue;
}
// not the string operator
if ((cn1 >= 'a' && cn1 <= 'z') || (cn1 >= 'A' && cn1 <= 'Z') ||
(cn1 >= '0' && cn1 <= '9') || cn1 == '_' || cn1 == '.') {
goto after_match_string_operator;
}
/* not */
if ('n' == c0) {
if (('o' == c1) && ('t' == c2) && (' ' == c3)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " not ");
i = i + 3;
continue;
}
}
/* and */
if ('a' == c0) {
if (('n' == c1) && ('d' == c2) && (' ' == c3)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " and ");
i = i + 3;
continue;
}
}
/* or */
if ('o' == c0) {
if (('r' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " or ");
i = i + 2;
continue;
}
}
/* is */
if ('i' == c0) {
if (('s' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " is ");
i = i + 2;
continue;
}
}
/* in */
if ('i' == c0) {
if (('n' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " in ");
i = i + 2;
continue;
}
}
/* as */
if ('a' == c0) {
if (('s' == c1) && (' ' == c2)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg = Lexer_setToken(tokens_arg, TOKEN_operator, " as ");
i = i + 2;
continue;
}
}
/* import */
if ('i' == c0) {
if (('m' == c1) && ('p' == c2) && ('o' == c3) && ('r' == c4) &&
('t' == c5) && (' ' == c6)) {
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
tokens_arg =
Lexer_setToken(tokens_arg, TOKEN_operator, " import ");
i = i + 5;
continue;
}
}
after_match_string_operator:
/* skip spaces */
if (' ' == c0) {
/* not get symbal */
if (i == symbol_start_index) {
symbol_start_index = -1;
} else {
/* already get symbal */
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, i, &symbol_start_index);
}
}
if (i == size - 1) {
/* last check symbel */
// if('\n' == c0){
// continue;
// }
tokens_arg =
Lexer_setSymbel(tokens_arg, stmt, size, &symbol_start_index);
}
}
/* output */
tokens = arg_getStr(tokens_arg);
tokens = strsCopy(outBuffs, tokens);
arg_deinit(tokens_arg);
return tokens;
}
char* Parser_popToken(Args* buffs_p, char* tokens) {
return strsPopToken(buffs_p, tokens, 0x1F);
}
enum TokenType Token_getType(char* token) {
return (enum TokenType)token[0];
}
char* Token_getPyload(char* token) {
return (char*)((uintptr_t)token + 1);
}
uint8_t Parser_isContainToken(char* tokens,
enum TokenType token_type,
char* pyload) {
Args buffs = {0};
char* tokens_buff = strsCopy(&buffs, tokens);
uint8_t res = 0;
uint16_t token_size = Tokens_getSize(tokens);
for (int i = 0; i < token_size; i++) {
char* token = Parser_popToken(&buffs, tokens_buff);
if (token_type == Token_getType(token)) {
if (strEqu(Token_getPyload(token), pyload)) {
res = 1;
goto exit;
}
}
}
exit:
strsDeinit(&buffs);
return res;
}
static const char operators[][9] = {
"**", "~", "*", "/", "%", "//", "+", "-",
">>", "<<", "&", "^", "|", "<", "<=", ">",
">=", "!=", "==", " is ", " in ", "%=", "/=", "//=",
"-=", "+=", "*=", "**=", " not ", " and ", " or ", " import "};
char* Lexer_getOperator(Args* outBuffs, char* stmt) {
Args buffs = {0};
char* operator= NULL;
char* tokens = Lexer_parseLine(&buffs, stmt);
// use parse state foreach to get operator
for (uint32_t i = 0; i < sizeof(operators) / 9; i++) {
Cursor_forEachToken(ps, tokens) {
Cursor_iterStart(&ps);
// get operator
if (strEqu(ps.token2.pyload, (char*)operators[i])) {
// solve the iuuse of "~-1"
operator= strsCopy(&buffs, (char*)operators[i]);
Cursor_iterEnd(&ps);
break;
}
Cursor_iterEnd(&ps);
};
Cursor_deinit(&ps);
}
/* solve the iuuse of "~-1" */
if (strEqu(operator, "-")) {
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (strEqu(ps.token2.pyload, "-")) {
if (ps.token1.type == TOKEN_operator) {
operator= strsCopy(&buffs, ps.token1.pyload);
Cursor_iterEnd(&ps);
break;
}
}
Cursor_iterEnd(&ps);
};
Cursor_deinit(&ps);
}
/* match the last operator in equal level */
if ((strEqu(operator, "+")) || (strEqu(operator, "-"))) {
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (strEqu(ps.token1.pyload, "+")) {
operator= strsCopy(&buffs, "+");
}
if (strEqu(ps.token1.pyload, "-")) {
operator= strsCopy(&buffs, "-");
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
}
/* out put */
if (NULL == operator) {
return NULL;
}
operator= strsCopy(outBuffs, operator);
strsDeinit(&buffs);
return operator;
}
const char void_str[] = "";
void LexToken_update(struct LexToken* lex_token) {
lex_token->type = Token_getType(lex_token->token);
if (lex_token->type == TOKEN_strEnd) {
lex_token->pyload = (char*)void_str;
} else {
lex_token->pyload = Token_getPyload(lex_token->token);
}
}
void Cursor_iterStart(struct Cursor* ps) {
ps->iter_index++;
ps->iter_buffs = New_strBuff();
/* token1 is the last token */
ps->token1.token = strsCopy(ps->iter_buffs, arg_getStr(ps->last_token));
/* token2 is the next token */
ps->token2.token = Parser_popToken(ps->iter_buffs, ps->tokens);
/* store last token */
arg_deinit(ps->last_token);
ps->last_token = arg_newStr(ps->token2.token);
LexToken_update(&ps->token1);
LexToken_update(&ps->token2);
if (strEqu(ps->token1.pyload, "(")) {
ps->branket_deepth++;
}
if (strEqu(ps->token1.pyload, ")")) {
ps->branket_deepth--;
}
if (strEqu(ps->token1.pyload, "[")) {
ps->branket_deepth++;
}
if (strEqu(ps->token1.pyload, "]")) {
ps->branket_deepth--;
}
if (strEqu(ps->token1.pyload, "{")) {
ps->branket_deepth++;
}
if (strEqu(ps->token1.pyload, "}")) {
ps->branket_deepth--;
}
}
void LexToken_init(struct LexToken* lt) {
lt->pyload = NULL;
lt->token = NULL;
lt->type = TOKEN_strEnd;
}
void Cursor_init(struct Cursor* ps) {
ps->tokens = NULL;
ps->length = 0;
ps->iter_index = 0;
ps->branket_deepth = 0;
ps->last_token = NULL;
ps->iter_buffs = NULL;
ps->buffs_p = New_strBuff();
ps->result = PIKA_RES_OK;
LexToken_init(&ps->token1);
LexToken_init(&ps->token2);
}
void Cursor_iterEnd(struct Cursor* ps) {
args_deinit(ps->iter_buffs);
}
void Cursor_deinit(struct Cursor* ps) {
if (NULL != ps->last_token) {
arg_deinit(ps->last_token);
}
args_deinit(ps->buffs_p);
}
void Cursor_parse(struct Cursor* ps, char* stmt) {
if (NULL == stmt) {
ps->result = PIKA_RES_ERR_SYNTAX_ERROR;
return;
}
ps->tokens = Lexer_parseLine(ps->buffs_p, stmt);
if (NULL == ps->tokens) {
ps->result = PIKA_RES_ERR_SYNTAX_ERROR;
return;
}
ps->length = Tokens_getSize(ps->tokens);
}
void Cursor_beforeIter(struct Cursor* ps) {
/* clear first token */
if (ps->result != PIKA_RES_OK) {
return;
}
Parser_popToken(ps->buffs_p, ps->tokens);
ps->last_token = arg_newStr(Parser_popToken(ps->buffs_p, ps->tokens));
}
static void Slice_getPars(Args* outBuffs,
char* inner,
char** pStart,
char** pEnd,
char** pStep) {
Args buffs = {0};
*pStart = "";
*pEnd = "";
*pStep = "";
/* slice */
uint8_t colon_i = 0;
Cursor_forEachToken(ps, inner) {
Cursor_iterStart(&ps);
if (strEqu(ps.token1.pyload, ":") && ps.branket_deepth == 0) {
colon_i++;
goto iter_continue1;
}
if (colon_i == 0) {
*pStart = strsAppend(&buffs, *pStart, ps.token1.pyload);
}
if (colon_i == 1) {
*pEnd = strsAppend(&buffs, *pEnd, ps.token1.pyload);
}
if (colon_i == 2) {
*pStep = strsAppend(&buffs, *pStep, ps.token1.pyload);
}
iter_continue1:
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
if (colon_i == 1) {
*pStep = "1";
if (strEqu(*pStart, "")) {
*pStart = "0";
}
if (strEqu(*pEnd, "")) {
*pEnd = "-1";
}
}
if (colon_i == 0) {
*pEnd = strsAppend(&buffs, *pStart, " + 1");
*pStep = "1";
}
/* slice with step */
/* output */
*pStart = strsCopy(outBuffs, *pStart);
*pEnd = strsCopy(outBuffs, *pEnd);
*pStep = strsCopy(outBuffs, *pStep);
/* clean */
strsDeinit(&buffs);
}
char* Suger_solveLeftSlice(Args* outBuffs, char* right, char** left_p) {
#if !PIKA_SYNTAX_SLICE_ENABLE
return right;
#endif
/* init objects */
Args buffs = {0};
Arg* right_arg = arg_newStr("");
char* left = *left_p;
uint8_t is_in_brancket = 0;
args_setStr(&buffs, "inner", "");
uint8_t matched = 0;
char* right_res = NULL;
/* exit when NULL */
if (NULL == left) {
arg_deinit(right_arg);
right_arg = arg_setStr(right_arg, "", right);
goto exit;
}
/* exit when not match
(symble|iteral)'['
*/
Cursor_forEachToken(ps, left) {
Cursor_iterStart(&ps);
if (strEqu(ps.token2.pyload, "[")) {
if (TOKEN_symbol == ps.token1.type ||
TOKEN_literal == ps.token1.type) {
matched = 1;
Cursor_iterEnd(&ps);
break;
}
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
if (!matched) {
/* not contain '[', return origin */
arg_deinit(right_arg);
right_arg = arg_setStr(right_arg, "", right);
goto exit;
}
/* matched [] */
Cursor_forEachTokenExistPs(ps, left) {
Cursor_iterStart(&ps);
/* found '[' */
if ((TOKEN_devider == ps.token2.type) &&
(strEqu(ps.token2.pyload, "["))) {
/* get 'obj' from obj[] */
args_setStr(&buffs, "obj", ps.token1.pyload);
is_in_brancket = 1;
/* fond ']' */
} else if ((TOKEN_devider == ps.token2.type) &&
(strEqu(ps.token2.pyload, "]"))) {
is_in_brancket = 0;
char* inner = args_getStr(&buffs, "inner");
Arg* inner_arg = arg_newStr(inner);
inner_arg = arg_strAppend(inner_arg, ps.token1.pyload);
args_setStr(&buffs, "inner", arg_getStr(inner_arg));
arg_deinit(inner_arg);
/* update inner pointer */
inner = args_getStr(&buffs, "inner");
char* start = NULL;
char* end = NULL;
char* step = NULL;
Slice_getPars(&buffs, inner, &start, &end, &step);
/* obj = __setitem__(obj, key, val) */
right_arg = arg_strAppend(right_arg, "__setitem__(");
right_arg = arg_strAppend(right_arg, args_getStr(&buffs, "obj"));
right_arg = arg_strAppend(right_arg, ",");
right_arg = arg_strAppend(right_arg, start);
right_arg = arg_strAppend(right_arg, ",");
right_arg = arg_strAppend(right_arg, right);
right_arg = arg_strAppend(right_arg, ")");
/* clean the inner */
args_setStr(&buffs, "inner", "");
/* in brancket and found '[' */
} else if (is_in_brancket && (!strEqu(ps.token1.pyload, "["))) {
char* inner = args_getStr(&buffs, "inner");
Arg* index_arg = arg_newStr(inner);
index_arg = arg_strAppend(index_arg, ps.token1.pyload);
args_setStr(&buffs, "inner", arg_getStr(index_arg));
arg_deinit(index_arg);
/* out of brancket and not found ']' */
} else if (!is_in_brancket && (!strEqu(ps.token1.pyload, "]"))) {
if (TOKEN_strEnd != ps.token1.type) {
right_arg = arg_strAppend(right_arg, ps.token1.pyload);
}
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
/* clean the left */
for (size_t i = 0; i < strGetSize(left); i++) {
if (left[i] == '[') {
left[i] = '\0';
break;
}
}
exit:
/* clean and return */
right_res = strsCopy(outBuffs, arg_getStr(right_arg));
arg_deinit(right_arg);
strsDeinit(&buffs);
return right_res;
}
char* Suger_solveFormat(Args* outBuffs, char* right) {
#if !PIKA_SYNTAX_FORMAT_ENABLE
return right;
#endif
/* quick skip */
if (!strIsContain(right, '%')) {
return right;
}
PIKA_BOOL is_format = PIKA_FALSE;
Cursor_forEachToken(ps1, right) {
Cursor_iterStart(&ps1);
if (ps1.branket_deepth == 0 && strEqu(ps1.token1.pyload, "%")) {
is_format = PIKA_TRUE;
}
Cursor_iterEnd(&ps1);
}
Cursor_deinit(&ps1);
if (PIKA_FALSE == is_format) {
return right;
}
char* res = right;
Arg* str_buf = arg_newStr("");
Arg* var_buf = arg_newStr("");
PIKA_BOOL is_in_format = PIKA_FALSE;
PIKA_BOOL is_tuple = PIKA_FALSE;
PIKA_BOOL is_out_vars = PIKA_FALSE;
Args buffs = {0};
char* fmt = NULL;
Cursor_forEachToken(ps, right) {
char* item = "";
Cursor_iterStart(&ps);
if (PIKA_FALSE == is_in_format) {
if (ps.token1.type != TOKEN_literal) {
item = ps.token1.pyload;
goto iter_continue;
}
if (ps.token1.pyload[0] != '\'' && ps.token1.pyload[0] != '"') {
item = ps.token1.pyload;
goto iter_continue;
}
if (!strEqu(ps.token2.pyload, "%")) {
item = ps.token1.pyload;
goto iter_continue;
}
/* found the format stmt */
is_in_format = PIKA_TRUE;
fmt = strsCopy(&buffs, ps.token1.pyload);
goto iter_continue;
}
if (PIKA_TRUE == is_in_format) {
/* check the format vars */
if (strEqu(ps.token1.pyload, "%")) {
/* is a tuple */
if (strEqu(ps.token2.pyload, "(")) {
is_tuple = PIKA_TRUE;
} else {
var_buf = arg_strAppend(var_buf, ps.token2.pyload);
}
goto iter_continue;
}
/* found the end of tuple */
if (ps.iter_index == ps.length) {
is_out_vars = PIKA_TRUE;
is_in_format = PIKA_FALSE;
} else {
/* push the vars inner the tuple */
var_buf = arg_strAppend(var_buf, ps.token2.pyload);
}
if (is_out_vars) {
if (is_tuple) {
str_buf = arg_strAppend(str_buf, "cformat(");
str_buf = arg_strAppend(str_buf, fmt);
str_buf = arg_strAppend(str_buf, ",");
str_buf = arg_strAppend(str_buf, arg_getStr(var_buf));
} else {
str_buf = arg_strAppend(str_buf, "cformat(");
str_buf = arg_strAppend(str_buf, fmt);
str_buf = arg_strAppend(str_buf, ",");
str_buf = arg_strAppend(str_buf, arg_getStr(var_buf));
str_buf = arg_strAppend(str_buf, ")");
}
}
}
iter_continue:
if (!is_in_format) {
str_buf = arg_strAppend(str_buf, item);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
res = strsCopy(outBuffs, arg_getStr(str_buf));
arg_deinit(str_buf);
arg_deinit(var_buf);
strsDeinit(&buffs);
return res;
}
uint8_t Parser_solveSelfOperator(Args* outbuffs,
char* stmt,
char** right_p,
char** left_p) {
char* left_new = NULL;
char* right_new = NULL;
Arg* left_arg = arg_newStr("");
Arg* right_arg = arg_newStr("");
Arg* right_arg_new = arg_newStr("");
uint8_t is_left_exist = 0;
Args buffs = {0};
char _operator[2] = {0};
char* operator=(char*) _operator;
char* tokens = Lexer_parseLine(&buffs, stmt);
uint8_t is_right = 0;
if (Parser_isContainToken(tokens, TOKEN_operator, "+=")) {
operator[0] = '+';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "-=")) {
operator[0] = '-';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "*=")) {
operator[0] = '*';
}
if (Parser_isContainToken(tokens, TOKEN_operator, "/=")) {
operator[0] = '/';
}
/* not found self operator */
if (operator[0] == 0) {
goto exit;
}
/* found self operator */
is_left_exist = 1;
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if ((strEqu(ps.token1.pyload, "*=")) ||
(strEqu(ps.token1.pyload, "/=")) ||
(strEqu(ps.token1.pyload, "+=")) ||
(strEqu(ps.token1.pyload, "-="))) {
is_right = 1;
goto iter_continue;
}
if (!is_right) {
left_arg = arg_strAppend(left_arg, ps.token1.pyload);
} else {
right_arg = arg_strAppend(right_arg, ps.token1.pyload);
}
iter_continue:
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
/* connect right */
right_arg_new = arg_strAppend(right_arg_new, arg_getStr(left_arg));
right_arg_new = arg_strAppend(right_arg_new, operator);
right_arg_new = arg_strAppend(right_arg_new, "(");
right_arg_new = arg_strAppend(right_arg_new, arg_getStr(right_arg));
right_arg_new = arg_strAppend(right_arg_new, ")");
/* collect left_new and right_new */
left_new = arg_getStr(left_arg);
right_new = arg_getStr(right_arg_new);
exit:
strsDeinit(&buffs);
if (NULL != right_new) {
*(right_p) = strsCopy(outbuffs, right_new);
;
}
if (NULL != left_new) {
*(left_p) = strsCopy(outbuffs, left_new);
}
arg_deinit(right_arg);
arg_deinit(left_arg);
arg_deinit(right_arg_new);
return is_left_exist;
}
PIKA_RES AST_setThisNode(AST* ast, char* node_type, char* node_content) {
return obj_setStr(ast, node_type, node_content);
}
AST* AST_parseStmt(AST* ast, char* stmt);
PIKA_RES AST_parseSubStmt(AST* ast, char* node_content) {
queueObj_pushObj(ast, (char*)"stmt");
AST_parseStmt(queueObj_getCurrentObj(ast), node_content);
return PIKA_RES_OK;
}
char* Parser_popSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) {
Arg* substmt_arg = arg_newStr("");
Arg* newstmt_arg = arg_newStr("");
char* stmt = *stmt_p;
PIKA_BOOL is_get_substmt = PIKA_FALSE;
Args buffs = {0};
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (is_get_substmt) {
/* get new stmt */
newstmt_arg = arg_strAppend(newstmt_arg, ps.token1.pyload);
Cursor_iterEnd(&ps);
continue;
}
if (ps.branket_deepth > 0) {
/* ignore */
substmt_arg = arg_strAppend(substmt_arg, ps.token1.pyload);
Cursor_iterEnd(&ps);
continue;
}
if (strEqu(ps.token1.pyload, delimiter)) {
/* found delimiter */
is_get_substmt = PIKA_TRUE;
Cursor_iterEnd(&ps);
continue;
}
/* collect substmt */
substmt_arg = arg_strAppend(substmt_arg, ps.token1.pyload);
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
strsDeinit(&buffs);
char* substmt = strsCacheArg(outbuffs, substmt_arg);
char* newstmt = strsCacheArg(outbuffs, newstmt_arg);
*stmt_p = newstmt;
return substmt;
}
char* Parser_popLastSubStmt(Args* outbuffs, char** stmt_p, char* delimiter) {
uint8_t last_stmt_i = 0;
char* stmt = *stmt_p;
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (strIsStartWith(ps.token1.pyload, delimiter)) {
/* found delimiter */
if (!strEqu(delimiter, "[") && ps.branket_deepth > 0) {
/* ignore */
Cursor_iterEnd(&ps);
continue;
}
/* for "[" */
if (ps.branket_deepth > 1) {
/* ignore */
Cursor_iterEnd(&ps);
continue;
}
last_stmt_i = ps.iter_index;
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
Arg* mainStmt = arg_newStr("");
Arg* lastStmt = arg_newStr("");
{
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (ps.iter_index < last_stmt_i) {
mainStmt = arg_strAppend(mainStmt, ps.token1.pyload);
}
if (ps.iter_index >= last_stmt_i) {
lastStmt = arg_strAppend(lastStmt, ps.token1.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
}
*stmt_p = strsCacheArg(outbuffs, mainStmt);
return strsCacheArg(outbuffs, lastStmt);
}
static void _AST_parse_list(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_BUILTIN_STRUCT_ENABLE
return;
#endif
AST_setThisNode(ast, (char*)"list", "list");
char* subStmts = strsCut(buffs, stmt, '[', ']');
subStmts = strsAppend(buffs, subStmts, ",");
while (1) {
char* subStmt = Parser_popSubStmt(buffs, &subStmts, ",");
AST_parseSubStmt(ast, subStmt);
if (strEqu(subStmts, "")) {
break;
}
}
return;
}
static void _AST_parse_dict(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_BUILTIN_STRUCT_ENABLE
return;
#endif
AST_setThisNode(ast, (char*)"dict", "dict");
char* subStmts = strsCut(buffs, stmt, '{', '}');
subStmts = strsAppend(buffs, subStmts, ",");
while (1) {
char* subStmt = Parser_popSubStmt(buffs, &subStmts, ",");
char* key = Parser_popSubStmt(buffs, &subStmt, ":");
char* value = subStmt;
AST_parseSubStmt(ast, key);
AST_parseSubStmt(ast, value);
if (strEqu(subStmts, "")) {
break;
}
}
}
static void _AST_parse_slice(AST* ast, Args* buffs, char* stmt) {
#if !PIKA_SYNTAX_SLICE_ENABLE
return;
#endif
AST_setThisNode(ast, (char*)"slice", "slice");
stmt = strsCopy(buffs, stmt);
char* laststmt = Parser_popLastSubStmt(buffs, &stmt, "[");
AST_parseSubStmt(ast, stmt);
char* slice_list = strsCut(buffs, laststmt, '[', ']');
pika_assert(slice_list != NULL);
slice_list = strsAppend(buffs, slice_list, ":");
int index = 0;
while (1) {
char* slice_str = Parser_popSubStmt(buffs, &slice_list, ":");
if (index == 0 && strEqu(slice_str, "")) {
AST_parseSubStmt(ast, "0");
} else if (index == 1 && strEqu(slice_str, "")) {
AST_parseSubStmt(ast, "-1");
} else {
AST_parseSubStmt(ast, slice_str);
}
index++;
if (strEqu("", slice_list)) {
break;
}
}
}
AST* AST_parseStmt(AST* ast, char* stmt) {
Args buffs = {0};
char* assignment = strsGetFirstToken(&buffs, stmt, '(');
char* method = NULL;
char* ref = NULL;
char* str = NULL;
char* num = NULL;
char* left = NULL;
char* right = NULL;
char* import = NULL;
PIKA_RES result = PIKA_RES_OK;
right = stmt;
/* solve check direct */
uint8_t isLeftExist = 0;
if (Parser_checkIsDirect(assignment)) {
isLeftExist = 1;
left = strsCopy(&buffs, "");
right = strsCopy(&buffs, "");
uint8_t is_meet_equ = 0;
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (strEqu(ps.token1.pyload, "=") &&
ps.token1.type == TOKEN_operator) {
is_meet_equ = 1;
Cursor_iterEnd(&ps);
continue;
}
if (0 == is_meet_equ) {
left = strsAppend(&buffs, left, ps.token1.pyload);
}
if (1 == is_meet_equ) {
right = strsAppend(&buffs, right, ps.token1.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
}
/* solve the += -= /= *= stmt */
if (!isLeftExist) {
isLeftExist = Parser_solveSelfOperator(&buffs, stmt, &right, &left);
}
/* solve the [] stmt */
right = Suger_solveLeftSlice(&buffs, right, &left);
right = Suger_solveFormat(&buffs, right);
/* set left */
if (isLeftExist) {
AST_setThisNode(ast, (char*)"left", left);
}
/* match statment type */
enum StmtType stmtType = Lexer_matchStmtType(right);
/* solve operator stmt */
if (STMT_operator == stmtType) {
char* rightWithoutSubStmt = strsDeleteBetween(&buffs, right, '(', ')');
char* operator= Lexer_getOperator(&buffs, rightWithoutSubStmt);
if (NULL == operator) {
result = PIKA_RES_ERR_SYNTAX_ERROR;
goto exit;
}
AST_setThisNode(ast, (char*)"operator", operator);
char* rightBuff = strsCopy(&buffs, right);
char* subStmt1 =
strsPopTokenWithSkip_byStr(&buffs, rightBuff, operator, '(', ')');
char* subStmt2 = rightBuff;
AST_parseSubStmt(ast, subStmt1);
AST_parseSubStmt(ast, subStmt2);
goto exit;
}
/* solve list stmt */
if (STMT_list == stmtType) {
_AST_parse_list(ast, &buffs, right);
goto exit;
}
/* solve dict stmt */
if (STMT_dict == stmtType) {
_AST_parse_dict(ast, &buffs, right);
goto exit;
}
/* solve method chain */
if (STMT_chain == stmtType) {
char* stmt = strsCopy(&buffs, right);
char* lastStmt = Parser_popLastSubStmt(&buffs, &stmt, ".");
AST_parseSubStmt(ast, stmt);
AST_parseStmt(ast, lastStmt);
goto exit;
}
if (STMT_slice == stmtType) {
/* solve slice stmt */
_AST_parse_slice(ast, &buffs, right);
goto exit;
}
/* solve method stmt */
if (STMT_method == stmtType) {
method = strsGetFirstToken(&buffs, right, '(');
AST_setThisNode(ast, (char*)"method", method);
char* subStmts = strsCut(&buffs, right, '(', ')');
pika_assert(NULL != subStmts);
/* add ',' at the end */
subStmts = strsAppend(&buffs, subStmts, ",");
while (1) {
char* substmt = Parser_popSubStmt(&buffs, &subStmts, ",");
AST_parseSubStmt(ast, substmt);
if (strEqu("", subStmts)) {
break;
}
}
goto exit;
}
/* solve reference stmt */
if (STMT_reference == stmtType) {
ref = right;
AST_setThisNode(ast, (char*)"ref", ref);
goto exit;
}
/* solve import stmt */
if (STMT_import == stmtType) {
import = strsGetLastToken(&buffs, right, ' ');
AST_setThisNode(ast, (char*)"import", import);
goto exit;
}
/* solve str stmt */
if (STMT_string == stmtType) {
str = strsCopy(&buffs, right);
/* remove the first char */
str = str + 1;
/* remove the last char */
str[strGetSize(str) - 1] = '\0';
/* replace */
if (strIsContain(str, '\\')) {
str = strsReplace(&buffs, str, "\\\"", "\"");
str = strsReplace(&buffs, str, "\\'", "'");
}
AST_setThisNode(ast, (char*)"string", str);
goto exit;
}
/* solve bytes stmt */
if (STMT_bytes == stmtType) {
str = right + 1;
str = strsDeleteChar(&buffs, str, '\'');
str = strsDeleteChar(&buffs, str, '\"');
AST_setThisNode(ast, (char*)"bytes", str);
goto exit;
}
/* solve number stmt */
if (STMT_number == stmtType) {
num = right;
AST_setThisNode(ast, (char*)"num", num);
goto exit;
}
exit:
strsDeinit(&buffs);
if (result != PIKA_RES_OK) {
AST_deinit(ast);
return NULL;
}
return ast;
}
static int32_t Parser_getPyLineBlockDeepth(char* line) {
uint32_t size = strGetSize(line);
for (uint32_t i = 0; i < size; i++) {
if (line[i] != ' ') {
uint32_t spaceNum = i;
if (0 == spaceNum % 4) {
return spaceNum / 4;
}
/* space Num is not 4N, error*/
return -1;
}
}
return 0;
}
char* Parser_removeAnnotation(char* line) {
uint8_t is_annotation_exit = 0;
uint8_t is_in_single_quotes = 0;
uint8_t is_in_double_quotes_deepth = 0;
for (uint32_t i = 0; i < strGetSize(line); i++) {
if ('\'' == line[i]) {
is_in_single_quotes = !is_in_single_quotes;
continue;
}
if ('"' == line[i]) {
is_in_double_quotes_deepth = !is_in_double_quotes_deepth;
continue;
}
if (!(is_in_single_quotes == 0 && is_in_double_quotes_deepth == 0)) {
continue;
}
if ('#' == line[i]) {
/* end the line */
line[i] = 0;
is_annotation_exit = 1;
break;
}
}
/* no annotation, exit */
if (!is_annotation_exit) {
return line;
}
/* check empty line */
for (uint32_t i = 0; i < strGetSize(line); i++) {
if (' ' != line[i]) {
return line;
}
}
/* is an emply line */
line = "@annontation";
return line;
}
/* match block start keywords */
const char control_keywords[][9] = {"break", "continue"};
/* normal keyward */
const char normal_keywords[][7] = {"while", "if", "elif"};
AST* AST_parseLine(char* line, Stack* block_stack) {
/* line is not exist */
if (line == NULL) {
return NULL;
}
/* init data */
AST* ast = New_queueObj();
Args buffs = {0};
int8_t block_deepth_now, block_deepth_last = -1;
char *line_start, *stmt;
/* get block deepth */
block_deepth_now = Parser_getPyLineBlockDeepth(line);
/* set block deepth */
if (block_deepth_now == -1) {
/* get block_deepth error */
__platform_printf(
"IndentationError: unexpected indent, only support 4 spaces\r\n");
obj_deinit(ast);
ast = NULL;
goto exit;
}
obj_setInt(ast, "blockDeepth", block_deepth_now);
/* check if exit block */
if (NULL != block_stack) {
block_deepth_last = stack_getTop(block_stack);
/* exit each block */
for (int i = 0; i < block_deepth_last - block_deepth_now; i++) {
QueueObj* exit_block_queue = obj_getObj(ast, "exitBlock");
/* create an exit_block queue */
if (NULL == exit_block_queue) {
obj_newObj(ast, "exitBlock", "", New_TinyObj);
exit_block_queue = obj_getObj(ast, "exitBlock");
queueObj_init(exit_block_queue);
}
char buff[10] = {0};
char* block_type = stack_popStr(block_stack, buff);
/* push exit block type to exit_block queue */
queueObj_pushStr(exit_block_queue, block_type);
}
}
line_start = line + block_deepth_now * 4;
stmt = line_start;
// "while" "if" "elif"
for (uint32_t i = 0; i < sizeof(normal_keywords) / 7; i++) {
char* keyword = (char*)normal_keywords[i];
uint8_t keyword_len = strGetSize(keyword);
if (strIsStartWith(line_start, keyword) &&
(line_start[keyword_len] == ' ')) {
stmt = strsCut(&buffs, line_start, ' ', ':');
AST_setThisNode(ast, "block", keyword);
if (NULL != block_stack) {
stack_pushStr(block_stack, keyword);
}
goto block_matched;
}
}
/* contral keyward */
/* "break", "continue" */
for (uint32_t i = 0; i < sizeof(control_keywords) / 8; i++) {
char* keyward = (char*)control_keywords[i];
uint8_t keyward_size = strGetSize(keyward);
if ((strIsStartWith(line_start, keyward)) &&
((line_start[keyward_size] == ' ') ||
(line_start[keyward_size] == 0))) {
AST_setThisNode(ast, keyward, "");
stmt = "";
goto block_matched;
}
}
/* for */
if (strIsStartWith(line_start, "for ")) {
Args* list_buffs = New_strBuff();
char* line_buff = strsCopy(list_buffs, line_start + 4);
if (strCountSign(line_buff, ':') < 1) {
args_deinit(list_buffs);
obj_deinit(ast);
ast = NULL;
goto exit;
}
char* arg_in = strsPopToken(list_buffs, line_buff, ' ');
AST_setThisNode(ast, "arg_in", arg_in);
strsPopToken(list_buffs, line_buff, ' ');
char* list_in = strsPopToken(list_buffs, line_buff, ':');
list_in = strsAppend(list_buffs, "iter(", list_in);
list_in = strsAppend(list_buffs, list_in, ")");
list_in = strsCopy(&buffs, list_in);
args_deinit(list_buffs);
AST_setThisNode(ast, "block", "for");
AST_setThisNode(ast, "list_in", list_in);
if (NULL != block_stack) {
stack_pushStr(block_stack, "for");
}
stmt = list_in;
goto block_matched;
}
/* else */
if (strIsStartWith(line_start, "else")) {
if ((line_start[4] == ' ') || (line_start[4] == ':')) {
stmt = "";
AST_setThisNode(ast, "block", "else");
if (NULL != block_stack) {
stack_pushStr(block_stack, "else");
}
}
goto block_matched;
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
/* try */
if (strIsStartWith(line_start, "try")) {
if ((line_start[3] == ' ') || (line_start[3] == ':')) {
stmt = "";
AST_setThisNode(ast, "block", "try");
if (NULL != block_stack) {
stack_pushStr(block_stack, "try");
}
}
goto block_matched;
}
/* except */
if (strIsStartWith(line_start, "except")) {
if ((line_start[6] == ' ') || (line_start[6] == ':')) {
stmt = "";
AST_setThisNode(ast, "block", "except");
if (NULL != block_stack) {
stack_pushStr(block_stack, "except");
}
}
goto block_matched;
}
#endif
if (strEqu(line_start, "return")) {
AST_setThisNode(ast, "return", "");
stmt = "";
goto block_matched;
}
if (strIsStartWith(line_start, "return ")) {
char* lineBuff = strsCopy(&buffs, line_start);
strsPopToken(&buffs, lineBuff, ' ');
stmt = lineBuff;
AST_setThisNode(ast, "return", "");
goto block_matched;
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
if (strEqu(line_start, "raise")) {
AST_setThisNode(ast, "raise", "");
stmt = "RuntimeError";
goto block_matched;
}
if (strIsStartWith(line_start, "raise ")) {
AST_setThisNode(ast, "raise", "");
char* lineBuff = strsCopy(&buffs, line_start);
strsPopToken(&buffs, lineBuff, ' ');
stmt = lineBuff;
if (strEqu("", stmt)) {
stmt = "RuntimeError";
}
goto block_matched;
}
/* assert */
if (strIsStartWith(line_start, "assert ")) {
stmt = "";
AST_setThisNode(ast, "assert", "");
char* lineBuff = strsCopy(&buffs, line_start + 7);
/* assert expr [, msg] */
while (1) {
char* subStmt = Parser_popSubStmt(&buffs, &lineBuff, ",");
AST_parseSubStmt(ast, subStmt);
if (strEqu(lineBuff, "")) {
break;
}
}
goto block_matched;
}
#endif
if (strIsStartWith(line_start, "global ")) {
stmt = "";
char* global_list = line_start + 7;
global_list = strsGetCleanCmd(&buffs, global_list);
AST_setThisNode(ast, "global", global_list);
goto block_matched;
}
if (strIsStartWith(line_start, "del ")) {
stmt = "";
char* del_dir = line_start + sizeof("del ") - 1;
del_dir = strsGetCleanCmd(&buffs, del_dir);
AST_setThisNode(ast, "del", del_dir);
goto block_matched;
}
if (strIsStartWith(line_start, (char*)"def ")) {
stmt = "";
char* declear = strsCut(&buffs, line_start, ' ', ':');
if (NULL == declear) {
obj_deinit(ast);
ast = NULL;
goto exit;
}
declear = strsGetCleanCmd(&buffs, declear);
AST_setThisNode(ast, "block", "def");
AST_setThisNode(ast, "declear", declear);
if (NULL != block_stack) {
stack_pushStr(block_stack, "def");
}
goto block_matched;
}
if (strIsStartWith(line_start, (char*)"class ")) {
stmt = "";
char* declear = strsCut(&buffs, line_start, ' ', ':');
if (NULL == declear) {
obj_deinit(ast);
ast = NULL;
goto exit;
}
declear = strsGetCleanCmd(&buffs, declear);
AST_setThisNode(ast, "block", "class");
AST_setThisNode(ast, "declear", declear);
if (NULL != block_stack) {
stack_pushStr(block_stack, "class");
}
goto block_matched;
}
block_matched:
if (NULL == stmt) {
AST_deinit(ast);
ast = NULL;
goto exit;
}
stmt = strsGetCleanCmd(&buffs, stmt);
ast = AST_parseStmt(ast, stmt);
goto exit;
exit:
strsDeinit(&buffs);
return ast;
}
static char* Suger_import(Args* buffs_p, char* line) {
#if !PIKA_SYNTAX_IMPORT_EX_ENABLE
return line;
#endif
Args buffs = {0};
char* line_out = line;
char* alias = NULL;
char* origin = NULL;
char* stmt = line + 7;
if (!strIsStartWith(line, "import ")) {
line_out = line;
goto exit;
}
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
/* defaut set the 'origin' as the first token */
if (ps.iter_index == 1) {
origin = strsCopy(&buffs, ps.token1.pyload);
}
if (strEqu(ps.token2.pyload, " as ")) {
origin = strsCopy(&buffs, ps.token1.pyload);
}
if (strEqu(ps.token1.pyload, " as ")) {
alias = strsCopy(&buffs, ps.token2.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
/* only import, not 'as' */
if (NULL == alias) {
line_out = line;
goto exit;
}
/* 'import' and 'as' */
line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s",
origin, alias, origin);
line_out = strsCopy(buffs_p, line_out);
goto exit;
exit:
strsDeinit(&buffs);
return line_out;
}
static char* Suger_from(Args* buffs_p, char* line) {
#if !PIKA_SYNTAX_IMPORT_EX_ENABLE
return line;
#endif
Args buffs = {0};
char* line_out = line;
char* class = NULL;
char* module = NULL;
char* alias = NULL;
char* stmt = line + 5;
if (!strIsStartWith(line, "from ")) {
line_out = line;
goto exit;
}
Cursor_forEachToken(ps, stmt) {
Cursor_iterStart(&ps);
if (strEqu(ps.token2.pyload, " import ")) {
module = strsCopy(&buffs, ps.token1.pyload);
}
if (strEqu(ps.token1.pyload, " import ")) {
class = strsCopy(&buffs, ps.token2.pyload);
}
if (strEqu(ps.token1.pyload, " as ")) {
alias = strsCopy(&buffs, ps.token2.pyload);
}
Cursor_iterEnd(&ps);
}
Cursor_deinit(&ps);
if (NULL == module) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
if (NULL == class) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
if (NULL == alias) {
alias = class;
}
/* skip PikaObj */
if (strEqu(module, "PikaObj")) {
line_out = strsCopy(buffs_p, "");
goto exit;
}
line_out = strsFormat(&buffs, PIKA_LINE_BUFF_SIZE, "import %s\n%s = %s.%s",
module, alias, module, class);
line_out = strsCopy(buffs_p, line_out);
exit:
strsDeinit(&buffs);
return line_out;
}
static char* Parser_linePreProcess(Args* buffs_p, char* line) {
line = Parser_removeAnnotation(line);
/* check syntex error */
if (Lexer_isError(line)) {
line = NULL;
goto exit;
}
/* process EOL */
line = strsDeleteChar(buffs_p, line, '\r');
line = Suger_import(buffs_p, line);
line = Suger_from(buffs_p, line);
exit:
return line;
}
char* Parser_LineToAsm(Args* buffs_p, char* line, Stack* blockStack) {
char* ASM = NULL;
AST* ast = NULL;
uint8_t line_num = 0;
/* pre process */
line = Parser_linePreProcess(buffs_p, line);
if (NULL == line) {
/* preprocess error */
goto exit;
}
if (strEqu("@annontation", line)) {
ASM = "";
goto exit;
}
/*
solve more lines
preprocess may generate more lines
*/
line_num = strCountSign(line, '\n') + 1;
for (int i = 0; i < line_num; i++) {
char* single_line = strsPopToken(buffs_p, line, '\n');
/* parse tokens to AST */
ast = AST_parseLine(single_line, blockStack);
/* gen ASM from AST */
if (ASM == NULL) {
ASM = AST_toPikaASM(ast, buffs_p);
} else {
ASM = strsAppend(buffs_p, ASM, AST_toPikaASM(ast, buffs_p));
}
if (NULL != ast) {
AST_deinit(ast);
}
}
exit:
return ASM;
}
static int Parser_isVoidLine(char* line) {
for (uint32_t i = 0; i < strGetSize(line); i++) {
if (line[i] != ' ') {
return 0;
}
}
return 1;
}
static uint8_t Parser_checkIsMultiComment(char* line) {
for (uint32_t i = 0; i < strGetSize(line); i++) {
/* not match ' or " */
if ((line[i] != '\'') && (line[i] != '"')) {
continue;
}
/* not match ''' or """ */
if (!((line[i + 1] == line[i]) && (line[i + 2] == line[i]))) {
continue;
}
/* check char befor the ''' or """ */
if (!((0 == i) || (line[i - 1] == ' '))) {
continue;
}
/* check char after the ''' or """ */
if (!((line[i + 3] == ' ') || (line[i + 3] == 0))) {
continue;
}
/* mached */
return 1;
}
/* not mached */
return 0;
}
static char* _Parser_linesToBytesOrAsm(Args* outBuffs,
ByteCodeFrame* bytecode_frame,
char* py_lines) {
Stack block_stack;
stack_init(&block_stack);
Arg* asm_buff = arg_newStr("");
uint32_t lines_offset = 0;
uint16_t lines_num = strCountSign(py_lines, '\n') + 1;
uint16_t lines_index = 0;
uint8_t is_in_multi_comment = 0;
Arg* line_connection_arg = arg_newStr("");
uint8_t is_line_connection = 0;
char* out_ASM = NULL;
char* single_ASM = NULL;
uint32_t line_size = 0;
/* parse each line */
while (1) {
lines_index++;
Args buffs = {0};
char* line_origin = NULL;
char* line = NULL;
/* add void line to the end */
if (lines_index >= lines_num + 1) {
line = "";
goto parse_line;
}
/* get single line by pop multiline */
line_origin = strsGetFirstToken(&buffs, py_lines + lines_offset, '\n');
line = strsCopy(&buffs, line_origin);
/* line connection */
if (is_line_connection) {
is_line_connection = 0;
line_connection_arg = arg_strAppend(line_connection_arg, line);
line = strsCopy(&buffs, arg_getStr(line_connection_arg));
/* reflash the line_connection_arg */
arg_deinit(line_connection_arg);
line_connection_arg = arg_newStr("");
}
/* check connection */
if ('\\' == line[strGetSize(line) - 1]) {
/* remove the '\\' */
line[strGetSize(line) - 1] = '\0';
is_line_connection = 1;
line_connection_arg = arg_strAppend(line_connection_arg, line);
goto next_line;
}
Cursor_forEachToken(c, line) {
Cursor_iterStart(&c);
Cursor_iterEnd(&c);
}
Cursor_deinit(&c);
/* auto connection */
if (lines_index < lines_num) {
if (c.branket_deepth > 0) {
line_connection_arg = arg_strAppend(line_connection_arg, line);
is_line_connection = 1;
goto next_line;
}
}
/* branket match failed */
if (c.branket_deepth != 0) {
single_ASM = NULL;
goto parse_after;
}
/* support Tab */
line = strsReplace(&buffs, line, "\t", " ");
/* remove \r */
line = strsReplace(&buffs, line, "\r", "");
/* filter for not end \n */
if (Parser_isVoidLine(line)) {
goto next_line;
}
/* filter for multiline comment ''' or """ */
if (Parser_checkIsMultiComment(line)) {
is_in_multi_comment = ~is_in_multi_comment;
goto next_line;
}
/* skipe multiline comment */
if (is_in_multi_comment) {
goto next_line;
}
parse_line:
/* parse single Line to Asm */
single_ASM = Parser_LineToAsm(&buffs, line, &block_stack);
parse_after:
if (NULL == single_ASM) {
out_ASM = NULL;
strsDeinit(&buffs);
goto exit;
}
if (NULL == bytecode_frame) {
/* store ASM */
asm_buff = arg_strAppend(asm_buff, single_ASM);
} else if (NULL == outBuffs) {
/* store ByteCode */
byteCodeFrame_appendFromAsm(bytecode_frame, single_ASM);
}
next_line:
if (lines_index < lines_num) {
line_size = strGetSize(line_origin);
lines_offset = lines_offset + line_size + 1;
}
strsDeinit(&buffs);
/* exit when finished */
if (lines_index >= lines_num + 1) {
break;
}
}
if (NULL != outBuffs) {
/* load stored ASM */
out_ASM = strsCopy(outBuffs, arg_getStr(asm_buff));
} else {
out_ASM = (char*)1;
}
goto exit;
exit:
if (NULL != asm_buff) {
arg_deinit(asm_buff);
}
if (NULL != line_connection_arg) {
arg_deinit(line_connection_arg);
}
stack_deinit(&block_stack);
return out_ASM;
};
char* Parser_linesToBytes(ByteCodeFrame* bf, char* py_lines) {
return _Parser_linesToBytesOrAsm(NULL, bf, py_lines);
}
int bytecodeFrame_fromLines(ByteCodeFrame* bytecode_frame, char* multi_line) {
if (NULL == Parser_linesToBytes(bytecode_frame, multi_line)) {
/* error */
return 1;
}
/* succeed */
return 0;
};
char* Parser_linesToAsm(Args* outBuffs, char* multi_line) {
return _Parser_linesToBytesOrAsm(outBuffs, NULL, multi_line);
}
char* Parser_fileToAsm(Args* outBuffs, char* filename) {
Args buffs = {0};
Arg* file_arg = arg_loadFile(NULL, filename);
pika_assert(NULL != file_arg);
if (NULL == file_arg) {
return NULL;
}
char* lines = (char*)arg_getBytes(file_arg);
/* replace the "\r\n" to "\n" */
lines = strsReplace(&buffs, lines, "\r\n", "\n");
/* clear the void line */
lines = strsReplace(&buffs, lines, "\n\n", "\n");
/* add '\n' at the end */
lines = strsAppend(&buffs, lines, "\n\n");
char* res = Parser_linesToAsm(&buffs, lines);
arg_deinit(file_arg);
res = strsCopy(outBuffs, res);
strsDeinit(&buffs);
return res;
}
char* AST_appandPikaASM(AST* ast, AST* subAst, Args* outBuffs, char* pikaAsm) {
int deepth = obj_getInt(ast, "deepth");
Args buffs = {0};
/* append each queue item */
while (1) {
QueueObj* subStmt = queueObj_popObj(subAst);
if (NULL == subStmt) {
break;
}
obj_setInt(ast, "deepth", deepth + 1);
pikaAsm = AST_appandPikaASM(ast, subStmt, &buffs, pikaAsm);
}
/* Byte code generate rules */
const GenRule rules_subAst[] = {
{.ins = "RUN", .type = VAL_DYNAMIC, .ast = "method"},
{.ins = "OPT", .type = VAL_DYNAMIC, .ast = "operator"},
{.ins = "BYT", .type = VAL_DYNAMIC, .ast = "bytes"},
{.ins = "NUM", .type = VAL_DYNAMIC, .ast = "num"},
{.ins = "IMP", .type = VAL_DYNAMIC, .ast = "import"},
{.ins = "REF", .type = VAL_DYNAMIC, .ast = "ref"},
{.ins = "STR", .type = VAL_DYNAMIC, .ast = "string"},
{.ins = "SLC", .type = VAL_NONEVAL, .ast = "slice"},
{.ins = "DCT", .type = VAL_NONEVAL, .ast = "dict"},
{.ins = "LST", .type = VAL_NONEVAL, .ast = "list"},
{.ins = "OUT", .type = VAL_DYNAMIC, .ast = "left"}};
char* buff = args_getBuff(&buffs, PIKA_SPRINTF_BUFF_SIZE);
/* append the syntax item */
for (size_t i = 0; i < sizeof(rules_subAst) / sizeof(GenRule); i++) {
GenRule rule = rules_subAst[i];
char* astNodeVal = obj_getStr(subAst, rule.ast);
if (NULL != astNodeVal) {
/* e.g. "0 RUN print \n" */
__platform_sprintf(buff, "%d %s ", deepth, rule.ins);
Arg* abuff = arg_newStr(buff);
if (rule.type == VAL_DYNAMIC) {
abuff = arg_strAppend(abuff, astNodeVal);
}
abuff = arg_strAppend(abuff, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(abuff));
arg_deinit(abuff);
}
}
obj_setInt(ast, "deepth", deepth - 1);
goto exit;
exit:
pikaAsm = strsCopy(outBuffs, pikaAsm);
strsDeinit(&buffs);
return pikaAsm;
}
char* ASM_addBlockDeepth(AST* ast,
Args* buffs_p,
char* pikaAsm,
uint8_t deepthOffset) {
pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"B");
char buff[11];
pikaAsm = strsAppend(
buffs_p, pikaAsm,
fast_itoa(buff, obj_getInt(ast, "blockDeepth") + deepthOffset));
pikaAsm = strsAppend(buffs_p, pikaAsm, (char*)"\n");
return pikaAsm;
}
char* GenRule_toAsm(GenRule rule,
Args* buffs,
AST* ast,
char* pikaAsm,
int deepth) {
char* buff = args_getBuff(buffs, PIKA_SPRINTF_BUFF_SIZE);
/* parse stmt ast */
pikaAsm = AST_appandPikaASM(ast, ast, buffs, pikaAsm);
/* e.g. "0 CTN \n" */
__platform_sprintf(buff, "%d %s ", deepth, rule.ins);
Arg* abuff = arg_newStr(buff);
if (rule.type == VAL_DYNAMIC) {
abuff = arg_strAppend(abuff, obj_getStr(ast, rule.ast));
}
if (rule.type == VAL_STATIC_) {
abuff = arg_strAppend(abuff, rule.val);
}
abuff = arg_strAppend(abuff, "\n");
pikaAsm = strsAppend(buffs, pikaAsm, arg_getStr(abuff));
arg_deinit(abuff);
return pikaAsm;
}
char* AST_toPikaASM(AST* ast, Args* outBuffs) {
Args buffs = {0};
char* pikaAsm = strsCopy(&buffs, "");
QueueObj* exitBlock;
uint8_t is_block_matched;
if (NULL == ast) {
pikaAsm = NULL;
goto exit;
}
exitBlock = obj_getObj(ast, "exitBlock");
/* exiting from block */
if (exitBlock != NULL) {
while (1) {
uint8_t block_type_num = obj_getInt(exitBlock, "top") -
obj_getInt(exitBlock, "bottom") - 1;
char* block_type = queueObj_popStr(exitBlock);
if (NULL == block_type) {
break;
}
/* goto the while start when exit while block */
if (strEqu(block_type, "while")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n");
}
#if PIKA_SYNTAX_EXCEPTION_ENABLE
/* goto the while start when exit while block */
if (strEqu(block_type, "try")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NTR \n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 GER \n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JEZ 2\n");
}
if (strEqu(block_type, "except")) {
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 SER 0\n");
}
#endif
/* goto the while start when exit while block */
if (strEqu(block_type, "for")) {
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 JMP -1\n");
/* garbage collect for the list */
pikaAsm =
ASM_addBlockDeepth(ast, outBuffs, pikaAsm, block_type_num);
char _l_x[] = "_lx";
char block_deepth_char =
obj_getInt(ast, "blockDeepth") + block_type_num + '0';
_l_x[sizeof(_l_x) - 2] = block_deepth_char;
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 DEL ");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)_l_x);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"\n");
}
/* return when exit method */
if (strEqu(block_type, "def")) {
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm,
block_type_num + 1);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n");
}
/* return when exit class */
if (strEqu(block_type, "class")) {
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm,
block_type_num + 1);
pikaAsm =
strsAppend(outBuffs, pikaAsm, (char*)"0 RAS $origin\n");
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 1);
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 NEW self\n");
pikaAsm = strsAppend(outBuffs, pikaAsm, (char*)"0 RET \n");
}
}
}
/* add block deepth */
/* example: B0 */
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0);
/* "deepth" is invoke deepth, not the blockDeepth */
obj_setInt(ast, "deepth", 0);
/* match block */
is_block_matched = 0;
if (strEqu(obj_getStr(ast, "block"), "for")) {
/* for "for" iter */
char* arg_in = obj_getStr(ast, "arg_in");
Arg* newAsm_arg = arg_newStr("");
char _l_x[] = "_lx";
char block_deepth_char = '0';
block_deepth_char += obj_getInt(ast, "blockDeepth");
_l_x[sizeof(_l_x) - 2] = block_deepth_char;
/* init iter */
/* get the iter(_l<x>) */
pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm);
newAsm_arg = arg_strAppend(newAsm_arg, "0 OUT ");
newAsm_arg = arg_strAppend(newAsm_arg, _l_x);
newAsm_arg = arg_strAppend(newAsm_arg, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg));
arg_deinit(newAsm_arg);
newAsm_arg = arg_newStr("");
/* get next */
/* run next(_l<x>) */
/* check item is exist */
pikaAsm = ASM_addBlockDeepth(ast, outBuffs, pikaAsm, 0);
newAsm_arg = arg_strAppend(newAsm_arg, "0 RUN ");
newAsm_arg = arg_strAppend(newAsm_arg, _l_x);
newAsm_arg = arg_strAppend(newAsm_arg,
".__next__\n"
"0 OUT ");
newAsm_arg = arg_strAppend(newAsm_arg, arg_in);
newAsm_arg = arg_strAppend(newAsm_arg,
"\n"
"0 EST ");
newAsm_arg = arg_strAppend(newAsm_arg, arg_in);
newAsm_arg = arg_strAppend(newAsm_arg, "\n0 JEZ 2\n");
pikaAsm = strsAppend(&buffs, pikaAsm, arg_getStr(newAsm_arg));
arg_deinit(newAsm_arg);
is_block_matched = 1;
goto exit;
}
if (strEqu(obj_getStr(ast, "block"), "elif")) {
/* skip if __else is 0 */
pikaAsm = strsAppend(&buffs, pikaAsm, "0 NEL 1\n");
/* parse stmt ast */
pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm);
/* skip if stmt is 0 */
pikaAsm = strsAppend(&buffs, pikaAsm, "0 JEZ 1\n");
is_block_matched = 1;
goto exit;
}
if (strEqu(obj_getStr(ast, "block"), "def")) {
pikaAsm = strsAppend(&buffs, pikaAsm, "0 DEF ");
pikaAsm = strsAppend(&buffs, pikaAsm, obj_getStr(ast, "declear"));
pikaAsm = strsAppend(&buffs, pikaAsm,
"\n"
"0 JMP 1\n");
is_block_matched = 1;
goto exit;
}
if (strEqu(obj_getStr(ast, "block"), "class")) {
char* declear = obj_getStr(ast, "declear");
char* thisClass = NULL;
char* superClass = NULL;
if (strIsContain(declear, '(')) {
thisClass = strsGetFirstToken(&buffs, declear, '(');
superClass = strsCut(&buffs, declear, '(', ')');
} else {
thisClass = declear;
superClass = "";
}
if (strEqu("", superClass)) {
/* default superClass */
superClass = "TinyObj";
}
if (strEqu("TinyObj", superClass)) {
/* default superClass */
superClass = "TinyObj";
}
pikaAsm = strsAppend(&buffs, pikaAsm, "0 CLS ");
pikaAsm = strsAppend(&buffs, pikaAsm,
strsAppend(&buffs, thisClass,
"()\n"
"0 JMP 1\n"));
char block_deepth_str[] = "B0\n";
/* goto deeper block */
block_deepth_str[1] += obj_getInt(ast, "blockDeepth") + 1;
pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 RUN ");
pikaAsm = strsAppend(&buffs, pikaAsm, superClass);
pikaAsm = strsAppend(&buffs, pikaAsm, "\n");
pikaAsm = strsAppend(&buffs, pikaAsm, "0 OUT self\n");
pikaAsm = strsAppend(&buffs, pikaAsm, block_deepth_str);
pikaAsm = strsAppend(&buffs, pikaAsm, "0 RAS self\n");
is_block_matched = 1;
goto exit;
}
/* generate code for block ast */
const GenRule rules_block[] = {
{.ins = "TRY", .type = VAL_NONEVAL, .ast = "try"},
{.ins = "EXP", .type = VAL_NONEVAL, .ast = "except"},
{.ins = "NEL", .type = VAL_STATIC_, .ast = "else", .val = "1"},
{.ins = "JEZ", .type = VAL_STATIC_, .ast = "if", .val = "1"},
{.ins = "JEZ", .type = VAL_STATIC_, .ast = "while", .val = "2"},
};
for (size_t i = 0; i < sizeof(rules_block) / sizeof(GenRule); i++) {
GenRule rule = rules_block[i];
if (strEqu(obj_getStr(ast, "block"), rule.ast)) {
pikaAsm = GenRule_toAsm(rule, &buffs, ast, pikaAsm, 0);
is_block_matched = 1;
goto exit;
}
}
const GenRule rules_topAst[] = {
{.ins = "CTN", .type = VAL_NONEVAL, .ast = "continue"},
{.ins = "BRK", .type = VAL_NONEVAL, .ast = "break"},
{.ins = "DEL", .type = VAL_DYNAMIC, .ast = "del"},
{.ins = "GLB", .type = VAL_DYNAMIC, .ast = "global"},
{.ins = "RIS", .type = VAL_DYNAMIC, .ast = "raise"},
{.ins = "ASS", .type = VAL_NONEVAL, .ast = "assert"},
{.ins = "RET", .type = VAL_NONEVAL, .ast = "return"}};
/* generate code for top level ast */
for (size_t i = 0; i < sizeof(rules_topAst) / sizeof(rules_topAst[0]);
i++) {
GenRule item = rules_topAst[i];
if (obj_isArgExist(ast, item.ast)) {
pikaAsm = GenRule_toAsm(item, &buffs, ast, pikaAsm, 0);
is_block_matched = 1;
goto exit;
}
}
exit:
if (NULL == pikaAsm) {
strsDeinit(&buffs);
return NULL;
}
if (!is_block_matched) {
/* parse stmt ast */
pikaAsm = AST_appandPikaASM(ast, ast, &buffs, pikaAsm);
}
/* output pikaAsm */
pikaAsm = strsCopy(outBuffs, pikaAsm);
strsDeinit(&buffs);
return pikaAsm;
}
int32_t AST_deinit(AST* ast) {
return obj_deinit(ast);
}
ByteCodeFrame* byteCodeFrame_appendFromAsm(ByteCodeFrame* self, char* pikaAsm) {
Asmer asmer = {
.asm_code = pikaAsm,
.block_deepth_now = 0,
.is_new_line = 0,
.line_pointer = pikaAsm,
};
uint16_t const_pool_offset;
uint16_t exist_offset;
int invoke_deepth_int = 0;
for (int i = 0; i < strCountSign(pikaAsm, '\n'); i++) {
Args buffs = {0};
char* line = strsGetLine(&buffs, asmer.line_pointer);
char* data = NULL;
char ins_str[4] = "";
char invoke_deepth[3] = "";
uint8_t space_num = 0;
uint8_t invoke_deepth_i = 0;
uint8_t ins_str_i = 0;
Arg* line_buff = arg_newStr(line);
strsDeinit(&buffs);
line = arg_getStr(line_buff);
InstructUnit ins_unit = {0};
/* remove '\r' */
if (line[strGetSize(line) - 1] == '\r') {
line[strGetSize(line) - 1] = 0;
}
/* process block deepth flag*/
if ('B' == line[0]) {
asmer.block_deepth_now = fast_atoi(line + 1);
asmer.is_new_line = 1;
goto next_line;
}
/* process each ins */
/* get constPool offset */
const_pool_offset = 0;
for (int i = 0; i < (int)strGetSize(line); i++) {
if (space_num < 2) {
if (line[i] == ' ') {
space_num++;
if (space_num == 2) {
data = line + i + 1;
break;
}
continue;
}
}
if (space_num == 0) {
invoke_deepth[invoke_deepth_i++] = line[i];
continue;
}
if (space_num == 1) {
ins_str[ins_str_i++] = line[i];
continue;
}
}
exist_offset = constPool_getOffsetByData(&(self->const_pool), data);
/* get const offset */
if (strEqu(data, "")) {
/* not need const value */
const_pool_offset = 0;
} else if (65535 == exist_offset) {
/* push new const value */
const_pool_offset = constPool_getLastOffset(&(self->const_pool));
/* load const to const pool buff */
constPool_append(&(self->const_pool), data);
} else {
/* use exist const value */
const_pool_offset = exist_offset;
}
invoke_deepth_int = fast_atoi(invoke_deepth);
/* load Asm to byte code unit */
instructUnit_setBlockDeepth(&ins_unit, asmer.block_deepth_now);
instructUnit_setInvokeDeepth(&ins_unit, invoke_deepth_int);
instructUnit_setConstPoolIndex(&ins_unit, const_pool_offset);
instructUnit_setInstruct(&ins_unit, pikaVM_getInstructFromAsm(ins_str));
if (asmer.is_new_line) {
instructUnit_setIsNewLine(&ins_unit, 1);
asmer.is_new_line = 0;
}
/* append instructUnit to instructArray */
instructArray_append(&(self->instruct_array), &ins_unit);
next_line:
/* point to next line */
asmer.line_pointer += strGetLineSize(asmer.line_pointer) + 1;
arg_deinit(line_buff);
}
return self;
}
char* Parser_linesToArray(char* lines) {
ByteCodeFrame bytecode_frame;
byteCodeFrame_init(&bytecode_frame);
bytecodeFrame_fromLines(&bytecode_frame, lines);
/* do something */
byteCodeFrame_print(&bytecode_frame);
__platform_printf("\n\n/* clang-format off */\n");
__platform_printf("PIKA_PYTHON(\n");
__platform_printf("%s\n", lines);
__platform_printf(")\n");
__platform_printf("/* clang-format on */\n");
byteCodeFrame_printAsArray(&bytecode_frame);
/* deinit */
byteCodeFrame_deinit(&bytecode_frame);
__platform_printf("\n\n");
return NULL;
}