mirror of
https://github.com/elua/elua.git
synced 2025-01-08 20:56:17 +08:00
26361ea4cd
Added another "romfs" parameter to SConstruct. It can take one of 3 values: - "verbatim" (default): copy all the files from the eLua romfs/ directory to the binary image (exactly what happened until now) - "compress": use LuaSrcDiet (http://luaforge.net/projects/luasrcdiet, now included in the eLua distribution) to "compress" the source code by using different tricks (shorter identifiers, removing comments and EOLS, and so on). The output is still a compilable Lua file (although in most case it looks completely different from the original) which is copied in the eLua binary image instead of the original Lua file. The compression obtained by using this method seems to be very good in practice, thus it will save flash when needed. - "compile": use the eLua cross compiler (that must be built first by running 'scons -f cross-lua.py') to precompile the Lua files to bytecode, and write the bytecode to the image instead of the source file. This way, one can save both time (the scripts don't need to be compiled anymore) and RAM (for the same reason, especially since the Lua parser uses the stack a lot, which can lead to very nasty and hard to diagnose stack overflow bugs). It will be even more useful in the future, when (hopefully) more and more Lua bytecode data structures will be available directly from Flash (without having to copy them RAM as it happens now). It might also decrease the romfs memory footprint, but then again, it might not; it pretty much depends on the Lua programs included in the romfs.
3726 lines
130 KiB
Lua
Executable File
3726 lines
130 KiB
Lua
Executable File
#!/usr/bin/env lua
|
|
|
|
package.preload['llex'] = (function (...)
|
|
--[[--------------------------------------------------------------------
|
|
|
|
llex.lua: Lua 5.1 lexical analyzer in Lua
|
|
This file is part of LuaSrcDiet, based on Yueliang material.
|
|
|
|
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
|
|
The COPYRIGHT file describes the conditions
|
|
under which this software may be distributed.
|
|
|
|
See the ChangeLog for more information.
|
|
|
|
----------------------------------------------------------------------]]
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- NOTES:
|
|
-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
|
|
-- with significant modifications to handle LuaSrcDiet's needs:
|
|
-- (1) llex.error is an optional error function handler
|
|
-- (2) seminfo for strings include their delimiters and no
|
|
-- translation operations are performed on them
|
|
-- * ADDED shbang handling has been added to support executable scripts
|
|
-- * NO localized decimal point replacement magic
|
|
-- * NO limit to number of lines
|
|
-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
|
|
-- * Please read technotes.txt for more technical details.
|
|
----------------------------------------------------------------------]]
|
|
|
|
local base = _G
|
|
local string = string
|
|
module "llex"
|
|
|
|
local find = string.find
|
|
local match = string.match
|
|
local sub = string.sub
|
|
|
|
----------------------------------------------------------------------
|
|
-- initialize keyword list, variables
|
|
----------------------------------------------------------------------
|
|
|
|
local kw = {}
|
|
for v in string.gmatch([[
|
|
and break do else elseif end false for function if in
|
|
local nil not or repeat return then true until while]], "%S+") do
|
|
kw[v] = true
|
|
end
|
|
|
|
-- NOTE: see init() for module variables (externally visible):
|
|
-- tok, seminfo, tokln
|
|
|
|
local z, -- source stream
|
|
sourceid, -- name of source
|
|
I, -- position of lexer
|
|
buff, -- buffer for strings
|
|
ln -- line number
|
|
|
|
----------------------------------------------------------------------
|
|
-- add information to token listing
|
|
----------------------------------------------------------------------
|
|
|
|
local function addtoken(token, info)
|
|
local i = #tok + 1
|
|
tok[i] = token
|
|
seminfo[i] = info
|
|
tokln[i] = ln
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- handles line number incrementation and end-of-line characters
|
|
----------------------------------------------------------------------
|
|
|
|
local function inclinenumber(i, is_tok)
|
|
local sub = sub
|
|
local old = sub(z, i, i)
|
|
i = i + 1 -- skip '\n' or '\r'
|
|
local c = sub(z, i, i)
|
|
if (c == "\n" or c == "\r") and (c ~= old) then
|
|
i = i + 1 -- skip '\n\r' or '\r\n'
|
|
old = old..c
|
|
end
|
|
if is_tok then addtoken("TK_EOL", old) end
|
|
ln = ln + 1
|
|
I = i
|
|
return i
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- initialize lexer for given source _z and source name _sourceid
|
|
----------------------------------------------------------------------
|
|
|
|
function init(_z, _sourceid)
|
|
z = _z -- source
|
|
sourceid = _sourceid -- name of source
|
|
I = 1 -- lexer's position in source
|
|
ln = 1 -- line number
|
|
tok = {} -- lexed token list*
|
|
seminfo = {} -- lexed semantic information list*
|
|
tokln = {} -- line numbers for messages*
|
|
-- (*) externally visible thru' module
|
|
--------------------------------------------------------------------
|
|
-- initial processing (shbang handling)
|
|
--------------------------------------------------------------------
|
|
local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
|
|
if p then -- skip first line
|
|
I = I + #q
|
|
addtoken("TK_COMMENT", q)
|
|
if #r > 0 then inclinenumber(I, true) end
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- returns a chunk name or id, no truncation for long names
|
|
----------------------------------------------------------------------
|
|
|
|
function chunkid()
|
|
if sourceid and match(sourceid, "^[=@]") then
|
|
return sub(sourceid, 2) -- remove first char
|
|
end
|
|
return "[string]"
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- formats error message and throws error
|
|
-- * a simplified version, does not report what token was responsible
|
|
----------------------------------------------------------------------
|
|
|
|
function errorline(s, line)
|
|
local e = error or base.error
|
|
e(string.format("%s:%d: %s", chunkid(), line or ln, s))
|
|
end
|
|
local errorline = errorline
|
|
|
|
------------------------------------------------------------------------
|
|
-- count separators ("=") in a long string delimiter
|
|
------------------------------------------------------------------------
|
|
|
|
local function skip_sep(i)
|
|
local sub = sub
|
|
local s = sub(z, i, i)
|
|
i = i + 1
|
|
local count = #match(z, "=*", i) -- note, take the length
|
|
i = i + count
|
|
I = i
|
|
return (sub(z, i, i) == s) and count or (-count) - 1
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- reads a long string or long comment
|
|
----------------------------------------------------------------------
|
|
|
|
local function read_long_string(is_str, sep)
|
|
local i = I + 1 -- skip 2nd '['
|
|
local sub = sub
|
|
local c = sub(z, i, i)
|
|
if c == "\r" or c == "\n" then -- string starts with a newline?
|
|
i = inclinenumber(i) -- skip it
|
|
end
|
|
local j = i
|
|
while true do
|
|
local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
|
|
if not p then
|
|
errorline(is_str and "unfinished long string" or
|
|
"unfinished long comment")
|
|
end
|
|
i = p
|
|
if r == "]" then -- delimiter test
|
|
if skip_sep(i) == sep then
|
|
buff = sub(z, buff, I)
|
|
I = I + 1 -- skip 2nd ']'
|
|
return buff
|
|
end
|
|
i = I
|
|
else -- newline
|
|
buff = buff.."\n"
|
|
i = inclinenumber(i)
|
|
end
|
|
end--while
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- reads a string
|
|
----------------------------------------------------------------------
|
|
|
|
local function read_string(del)
|
|
local i = I
|
|
local find = find
|
|
local sub = sub
|
|
while true do
|
|
local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
|
|
if p then
|
|
if r == "\n" or r == "\r" then
|
|
errorline("unfinished string")
|
|
end
|
|
i = p
|
|
if r == "\\" then -- handle escapes
|
|
i = i + 1
|
|
r = sub(z, i, i)
|
|
if r == "" then break end -- (EOZ error)
|
|
p = find("abfnrtv\n\r", r, 1, true)
|
|
------------------------------------------------------
|
|
if p then -- special escapes
|
|
if p > 7 then
|
|
i = inclinenumber(i)
|
|
else
|
|
i = i + 1
|
|
end
|
|
------------------------------------------------------
|
|
elseif find(r, "%D") then -- other non-digits
|
|
i = i + 1
|
|
------------------------------------------------------
|
|
else -- \xxx sequence
|
|
local p, q, s = find(z, "^(%d%d?%d?)", i)
|
|
i = q + 1
|
|
if s + 1 > 256 then -- UCHAR_MAX
|
|
errorline("escape sequence too large")
|
|
end
|
|
------------------------------------------------------
|
|
end--if p
|
|
else
|
|
i = i + 1
|
|
if r == del then -- ending delimiter
|
|
I = i
|
|
return sub(z, buff, i - 1) -- return string
|
|
end
|
|
end--if r
|
|
else
|
|
break -- (error)
|
|
end--if p
|
|
end--while
|
|
errorline("unfinished string")
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- main lexer function
|
|
------------------------------------------------------------------------
|
|
|
|
function llex()
|
|
local find = find
|
|
local match = match
|
|
while true do--outer
|
|
local i = I
|
|
-- inner loop allows break to be used to nicely section tests
|
|
while true do--inner
|
|
----------------------------------------------------------------
|
|
local p, _, r = find(z, "^([_%a][_%w]*)", i)
|
|
if p then
|
|
I = i + #r
|
|
if kw[r] then
|
|
addtoken("TK_KEYWORD", r) -- reserved word (keyword)
|
|
else
|
|
addtoken("TK_NAME", r) -- identifier
|
|
end
|
|
break -- (continue)
|
|
end
|
|
----------------------------------------------------------------
|
|
local p, _, r = find(z, "^(%.?)%d", i)
|
|
if p then -- numeral
|
|
if r == "." then i = i + 1 end
|
|
local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
|
|
i = q + 1
|
|
if #r == 1 then -- optional exponent
|
|
if match(z, "^[%+%-]", i) then -- optional sign
|
|
i = i + 1
|
|
end
|
|
end
|
|
local _, q = find(z, "^[_%w]*", i)
|
|
I = q + 1
|
|
local v = sub(z, p, q) -- string equivalent
|
|
if not base.tonumber(v) then -- handles hex test also
|
|
errorline("malformed number")
|
|
end
|
|
addtoken("TK_NUMBER", v)
|
|
break -- (continue)
|
|
end
|
|
----------------------------------------------------------------
|
|
local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
|
|
if p then
|
|
if t == "\n" or t == "\r" then -- newline
|
|
inclinenumber(i, true)
|
|
else
|
|
I = q + 1 -- whitespace
|
|
addtoken("TK_SPACE", r)
|
|
end
|
|
break -- (continue)
|
|
end
|
|
----------------------------------------------------------------
|
|
local r = match(z, "^%p", i)
|
|
if r then
|
|
buff = i
|
|
local p = find("-[\"\'.=<>~", r, 1, true)
|
|
if p then
|
|
-- two-level if block for punctuation/symbols
|
|
--------------------------------------------------------
|
|
if p <= 2 then
|
|
if p == 1 then -- minus
|
|
local c = match(z, "^%-%-(%[?)", i)
|
|
if c then
|
|
i = i + 2
|
|
local sep = -1
|
|
if c == "[" then
|
|
sep = skip_sep(i)
|
|
end
|
|
if sep >= 0 then -- long comment
|
|
addtoken("TK_LCOMMENT", read_long_string(false, sep))
|
|
else -- short comment
|
|
I = find(z, "[\n\r]", i) or (#z + 1)
|
|
addtoken("TK_COMMENT", sub(z, buff, I - 1))
|
|
end
|
|
break -- (continue)
|
|
end
|
|
-- (fall through for "-")
|
|
else -- [ or long string
|
|
local sep = skip_sep(i)
|
|
if sep >= 0 then
|
|
addtoken("TK_LSTRING", read_long_string(true, sep))
|
|
elseif sep == -1 then
|
|
addtoken("TK_OP", "[")
|
|
else
|
|
errorline("invalid long string delimiter")
|
|
end
|
|
break -- (continue)
|
|
end
|
|
--------------------------------------------------------
|
|
elseif p <= 5 then
|
|
if p < 5 then -- strings
|
|
I = i + 1
|
|
addtoken("TK_STRING", read_string(r))
|
|
break -- (continue)
|
|
end
|
|
r = match(z, "^%.%.?%.?", i) -- .|..|... dots
|
|
-- (fall through)
|
|
--------------------------------------------------------
|
|
else -- relational
|
|
r = match(z, "^%p=?", i)
|
|
-- (fall through)
|
|
end
|
|
end
|
|
I = i + #r
|
|
addtoken("TK_OP", r) -- for other symbols, fall through
|
|
break -- (continue)
|
|
end
|
|
----------------------------------------------------------------
|
|
local r = sub(z, i, i)
|
|
if r ~= "" then
|
|
I = i + 1
|
|
addtoken("TK_OP", r) -- other single-char tokens
|
|
break
|
|
end
|
|
addtoken("TK_EOS", "") -- end of stream,
|
|
return -- exit here
|
|
----------------------------------------------------------------
|
|
end--while inner
|
|
end--while outer
|
|
end
|
|
|
|
return base.getfenv()
|
|
end)
|
|
package.preload['lparser'] = (function (...)
|
|
--[[--------------------------------------------------------------------
|
|
|
|
lparser.lua: Lua 5.1 parser in Lua
|
|
This file is part of LuaSrcDiet, based on Yueliang material.
|
|
|
|
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
|
|
The COPYRIGHT file describes the conditions
|
|
under which this software may be distributed.
|
|
|
|
See the ChangeLog for more information.
|
|
|
|
----------------------------------------------------------------------]]
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- NOTES:
|
|
-- * This is a version of the native 5.1.x parser from Yueliang 0.4.0,
|
|
-- with significant modifications to handle LuaSrcDiet's needs:
|
|
-- (1) needs pre-built token tables instead of a module.method
|
|
-- (2) lparser.error is an optional error handler (from llex)
|
|
-- (3) not full parsing, currently fakes raw/unlexed constants
|
|
-- (4) parser() returns globalinfo, localinfo tables
|
|
-- * Please read technotes.txt for more technical details.
|
|
-- * NO support for 'arg' vararg functions (LUA_COMPAT_VARARG)
|
|
-- * A lot of the parser is unused, but might later be useful for
|
|
-- full-on parsing and analysis for a few measly bytes saved.
|
|
----------------------------------------------------------------------]]
|
|
|
|
local base = _G
|
|
local string = string
|
|
module "lparser"
|
|
local _G = base.getfenv()
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- variable and data structure initialization
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- initialization: main variables
|
|
----------------------------------------------------------------------
|
|
|
|
local toklist, -- grammar-only token tables (token table,
|
|
seminfolist, -- semantic information table, line number
|
|
toklnlist, -- table, cross-reference table)
|
|
xreflist,
|
|
tpos, -- token position
|
|
|
|
line, -- start line # for error messages
|
|
lastln, -- last line # for ambiguous syntax chk
|
|
tok, seminfo, ln, xref, -- token, semantic info, line
|
|
nameref, -- proper position of <name> token
|
|
fs, -- current function state
|
|
top_fs, -- top-level function state
|
|
|
|
globalinfo, -- global variable information table
|
|
globallookup, -- global variable name lookup table
|
|
localinfo, -- local variable information table
|
|
ilocalinfo, -- inactive locals (prior to activation)
|
|
ilocalrefs -- corresponding references to activate
|
|
|
|
-- forward references for local functions
|
|
local explist1, expr, block, exp1, body, chunk
|
|
|
|
----------------------------------------------------------------------
|
|
-- initialization: data structures
|
|
----------------------------------------------------------------------
|
|
|
|
local gmatch = string.gmatch
|
|
|
|
local block_follow = {} -- lookahead check in chunk(), returnstat()
|
|
for v in gmatch("else elseif end until <eof>", "%S+") do
|
|
block_follow[v] = true
|
|
end
|
|
|
|
local stat_call = {} -- lookup for calls in stat()
|
|
for v in gmatch("if while do for repeat function local return break", "%S+") do
|
|
stat_call[v] = v.."_stat"
|
|
end
|
|
|
|
local binopr_left = {} -- binary operators, left priority
|
|
local binopr_right = {} -- binary operators, right priority
|
|
for op, lt, rt in gmatch([[
|
|
{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
|
|
{^ 10 9}{.. 5 4}
|
|
{~= 3 3}{== 3 3}
|
|
{< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
|
|
{and 2 2}{or 1 1}
|
|
]], "{(%S+)%s(%d+)%s(%d+)}") do
|
|
binopr_left[op] = lt + 0
|
|
binopr_right[op] = rt + 0
|
|
end
|
|
|
|
local unopr = { ["not"] = true, ["-"] = true,
|
|
["#"] = true, } -- unary operators
|
|
local UNARY_PRIORITY = 8 -- priority for unary operators
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- support functions
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- formats error message and throws error (duplicated from llex)
|
|
-- * a simplified version, does not report what token was responsible
|
|
----------------------------------------------------------------------
|
|
|
|
local function errorline(s, line)
|
|
local e = error or base.error
|
|
e(string.format("(source):%d: %s", line or ln, s))
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- handles incoming token, semantic information pairs
|
|
-- * NOTE: 'nextt' is named 'next' originally
|
|
----------------------------------------------------------------------
|
|
|
|
-- reads in next token
|
|
local function nextt()
|
|
lastln = toklnlist[tpos]
|
|
tok, seminfo, ln, xref
|
|
= toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos]
|
|
tpos = tpos + 1
|
|
end
|
|
|
|
-- peek at next token (single lookahead for table constructor)
|
|
local function lookahead()
|
|
return toklist[tpos]
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- throws a syntax error, or if token expected is not there
|
|
----------------------------------------------------------------------
|
|
|
|
local function syntaxerror(msg)
|
|
local tok = tok
|
|
if tok ~= "<number>" and tok ~= "<string>" then
|
|
if tok == "<name>" then tok = seminfo end
|
|
tok = "'"..tok.."'"
|
|
end
|
|
errorline(msg.." near "..tok)
|
|
end
|
|
|
|
local function error_expected(token)
|
|
syntaxerror("'"..token.."' expected")
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- tests for a token, returns outcome
|
|
-- * return value changed to boolean
|
|
----------------------------------------------------------------------
|
|
|
|
local function testnext(c)
|
|
if tok == c then nextt(); return true end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- check for existence of a token, throws error if not found
|
|
----------------------------------------------------------------------
|
|
|
|
local function check(c)
|
|
if tok ~= c then error_expected(c) end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- verify existence of a token, then skip it
|
|
----------------------------------------------------------------------
|
|
|
|
local function checknext(c)
|
|
check(c); nextt()
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- throws error if condition not matched
|
|
----------------------------------------------------------------------
|
|
|
|
local function check_condition(c, msg)
|
|
if not c then syntaxerror(msg) end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- verifies token conditions are met or else throw error
|
|
----------------------------------------------------------------------
|
|
|
|
local function check_match(what, who, where)
|
|
if not testnext(what) then
|
|
if where == ln then
|
|
error_expected(what)
|
|
else
|
|
syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
|
|
end
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- expect that token is a name, return the name
|
|
----------------------------------------------------------------------
|
|
|
|
local function str_checkname()
|
|
check("<name>")
|
|
local ts = seminfo
|
|
nameref = xref
|
|
nextt()
|
|
return ts
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- adds given string s in string pool, sets e as VK
|
|
----------------------------------------------------------------------
|
|
|
|
local function codestring(e, s)
|
|
e.k = "VK"
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- consume a name token, adds it to string pool
|
|
----------------------------------------------------------------------
|
|
|
|
local function checkname(e)
|
|
codestring(e, str_checkname())
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- variable (global|local|upvalue) handling
|
|
-- * to track locals and globals, we can extend Yueliang's minimal
|
|
-- variable management code with little trouble
|
|
-- * entry point is singlevar() for variable lookups
|
|
-- * lookup tables (bl.locallist) are maintained awkwardly in the basic
|
|
-- block data structures, PLUS the function data structure (this is
|
|
-- an inelegant hack, since bl is nil for the top level of a function)
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- register a local variable, create local variable object, set in
|
|
-- to-activate variable list
|
|
-- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
|
|
-- localfunc(), localstat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function new_localvar(name, special)
|
|
local bl = fs.bl
|
|
local locallist
|
|
-- locate locallist in current block object or function root object
|
|
if bl then
|
|
locallist = bl.locallist
|
|
else
|
|
locallist = fs.locallist
|
|
end
|
|
-- build local variable information object and set localinfo
|
|
local id = #localinfo + 1
|
|
localinfo[id] = { -- new local variable object
|
|
name = name, -- local variable name
|
|
xref = { nameref }, -- xref, first value is declaration
|
|
decl = nameref, -- location of declaration, = xref[1]
|
|
}
|
|
if special then -- "self" must be not be changed
|
|
localinfo[id].isself = true
|
|
end
|
|
-- this can override a local with the same name in the same scope
|
|
-- but first, keep it inactive until it gets activated
|
|
local i = #ilocalinfo + 1
|
|
ilocalinfo[i] = id
|
|
ilocalrefs[i] = locallist
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- actually activate the variables so that they are visible
|
|
-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
|
|
-- * used in parlist(), forbody(), localfunc(), localstat(), body()
|
|
----------------------------------------------------------------------
|
|
|
|
local function adjustlocalvars(nvars)
|
|
local sz = #ilocalinfo
|
|
-- i goes from left to right, in order of local allocation, because
|
|
-- of something like: local a,a,a = 1,2,3 which gives a = 3
|
|
while nvars > 0 do
|
|
nvars = nvars - 1
|
|
local i = sz - nvars
|
|
local id = ilocalinfo[i] -- local's id
|
|
local obj = localinfo[id]
|
|
local name = obj.name -- name of local
|
|
obj.act = xref -- set activation location
|
|
ilocalinfo[i] = nil
|
|
local locallist = ilocalrefs[i] -- ref to lookup table to update
|
|
ilocalrefs[i] = nil
|
|
local existing = locallist[name] -- if existing, remove old first!
|
|
if existing then -- do not overlap, set special
|
|
obj = localinfo[existing] -- form of rem, as -id
|
|
obj.rem = -id
|
|
end
|
|
locallist[name] = id -- activate, now visible to Lua
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- remove (deactivate) variables in current scope (before scope exits)
|
|
-- * zap entire locallist tables since we are not allocating registers
|
|
-- * used in leaveblock(), close_func()
|
|
----------------------------------------------------------------------
|
|
|
|
local function removevars()
|
|
local bl = fs.bl
|
|
local locallist
|
|
-- locate locallist in current block object or function root object
|
|
if bl then
|
|
locallist = bl.locallist
|
|
else
|
|
locallist = fs.locallist
|
|
end
|
|
-- enumerate the local list at current scope and deactivate 'em
|
|
for name, id in base.pairs(locallist) do
|
|
local obj = localinfo[id]
|
|
obj.rem = xref -- set deactivation location
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- creates a new local variable given a name
|
|
-- * skips internal locals (those starting with '('), so internal
|
|
-- locals never needs a corresponding adjustlocalvars() call
|
|
-- * special is true for "self" which must not be optimized
|
|
-- * used in fornum(), forlist(), parlist(), body()
|
|
----------------------------------------------------------------------
|
|
|
|
local function new_localvarliteral(name, special)
|
|
if string.sub(name, 1, 1) == "(" then -- can skip internal locals
|
|
return
|
|
end
|
|
new_localvar(name, special)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- search the local variable namespace of the given fs for a match
|
|
-- * returns localinfo index
|
|
-- * used only in singlevaraux()
|
|
----------------------------------------------------------------------
|
|
|
|
local function searchvar(fs, n)
|
|
local bl = fs.bl
|
|
local locallist
|
|
if bl then
|
|
locallist = bl.locallist
|
|
while locallist do
|
|
if locallist[n] then return locallist[n] end -- found
|
|
bl = bl.prev
|
|
locallist = bl and bl.locallist
|
|
end
|
|
end
|
|
locallist = fs.locallist
|
|
return locallist[n] or -1 -- found or not found (-1)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- handle locals, globals and upvalues and related processing
|
|
-- * search mechanism is recursive, calls itself to search parents
|
|
-- * used only in singlevar()
|
|
----------------------------------------------------------------------
|
|
|
|
local function singlevaraux(fs, n, var)
|
|
if fs == nil then -- no more levels?
|
|
var.k = "VGLOBAL" -- default is global variable
|
|
return "VGLOBAL"
|
|
else
|
|
local v = searchvar(fs, n) -- look up at current level
|
|
if v >= 0 then
|
|
var.k = "VLOCAL"
|
|
var.id = v
|
|
-- codegen may need to deal with upvalue here
|
|
return "VLOCAL"
|
|
else -- not found at current level; try upper one
|
|
if singlevaraux(fs.prev, n, var) == "VGLOBAL" then
|
|
return "VGLOBAL"
|
|
end
|
|
-- else was LOCAL or UPVAL, handle here
|
|
var.k = "VUPVAL" -- upvalue in this level
|
|
return "VUPVAL"
|
|
end--if v
|
|
end--if fs
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- consume a name token, creates a variable (global|local|upvalue)
|
|
-- * used in prefixexp(), funcname()
|
|
----------------------------------------------------------------------
|
|
|
|
local function singlevar(v)
|
|
local name = str_checkname()
|
|
singlevaraux(fs, name, v)
|
|
------------------------------------------------------------------
|
|
-- variable tracking
|
|
------------------------------------------------------------------
|
|
if v.k == "VGLOBAL" then
|
|
-- if global being accessed, keep track of it by creating an object
|
|
local id = globallookup[name]
|
|
if not id then
|
|
id = #globalinfo + 1
|
|
globalinfo[id] = { -- new global variable object
|
|
name = name, -- global variable name
|
|
xref = { nameref }, -- xref, first value is declaration
|
|
}
|
|
globallookup[name] = id -- remember it
|
|
else
|
|
local obj = globalinfo[id].xref
|
|
obj[#obj + 1] = nameref -- add xref
|
|
end
|
|
else
|
|
-- local/upvalue is being accessed, keep track of it
|
|
local id = v.id
|
|
local obj = localinfo[id].xref
|
|
obj[#obj + 1] = nameref -- add xref
|
|
end
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- state management functions with open/close pairs
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- enters a code unit, initializes elements
|
|
----------------------------------------------------------------------
|
|
|
|
local function enterblock(isbreakable)
|
|
local bl = {} -- per-block state
|
|
bl.isbreakable = isbreakable
|
|
bl.prev = fs.bl
|
|
bl.locallist = {}
|
|
fs.bl = bl
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- leaves a code unit, close any upvalues
|
|
----------------------------------------------------------------------
|
|
|
|
local function leaveblock()
|
|
local bl = fs.bl
|
|
removevars()
|
|
fs.bl = bl.prev
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- opening of a function
|
|
-- * top_fs is only for anchoring the top fs, so that parser() can
|
|
-- return it to the caller function along with useful output
|
|
-- * used in parser() and body()
|
|
----------------------------------------------------------------------
|
|
|
|
local function open_func()
|
|
local new_fs -- per-function state
|
|
if not fs then -- top_fs is created early
|
|
new_fs = top_fs
|
|
else
|
|
new_fs = {}
|
|
end
|
|
new_fs.prev = fs -- linked list of function states
|
|
new_fs.bl = nil
|
|
new_fs.locallist = {}
|
|
fs = new_fs
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- closing of a function
|
|
-- * used in parser() and body()
|
|
----------------------------------------------------------------------
|
|
|
|
local function close_func()
|
|
removevars()
|
|
fs = fs.prev
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- other parsing functions
|
|
-- * for table constructor, parameter list, argument list
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a function name suffix, for function call specifications
|
|
-- * used in primaryexp(), funcname()
|
|
----------------------------------------------------------------------
|
|
|
|
local function field(v)
|
|
-- field -> ['.' | ':'] NAME
|
|
local key = {}
|
|
nextt() -- skip the dot or colon
|
|
checkname(key)
|
|
v.k = "VINDEXED"
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a table indexing suffix, for constructors, expressions
|
|
-- * used in recfield(), primaryexp()
|
|
----------------------------------------------------------------------
|
|
|
|
local function yindex(v)
|
|
-- index -> '[' expr ']'
|
|
nextt() -- skip the '['
|
|
expr(v)
|
|
checknext("]")
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a table record (hash) field
|
|
-- * used in constructor()
|
|
----------------------------------------------------------------------
|
|
|
|
local function recfield(cc)
|
|
-- recfield -> (NAME | '['exp1']') = exp1
|
|
local key, val = {}, {}
|
|
if tok == "<name>" then
|
|
checkname(key)
|
|
else-- tok == '['
|
|
yindex(key)
|
|
end
|
|
checknext("=")
|
|
expr(val)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH)
|
|
-- * note: retained in this skeleton because it modifies cc.v.k
|
|
-- * used in constructor()
|
|
----------------------------------------------------------------------
|
|
|
|
local function closelistfield(cc)
|
|
if cc.v.k == "VVOID" then return end -- there is no list item
|
|
cc.v.k = "VVOID"
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a table list (array) field
|
|
-- * used in constructor()
|
|
----------------------------------------------------------------------
|
|
|
|
local function listfield(cc)
|
|
expr(cc.v)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a table constructor
|
|
-- * used in funcargs(), simpleexp()
|
|
----------------------------------------------------------------------
|
|
|
|
local function constructor(t)
|
|
-- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
|
|
-- field -> recfield | listfield
|
|
-- fieldsep -> ',' | ';'
|
|
local line = ln
|
|
local cc = {}
|
|
cc.v = {}
|
|
cc.t = t
|
|
t.k = "VRELOCABLE"
|
|
cc.v.k = "VVOID"
|
|
checknext("{")
|
|
repeat
|
|
if tok == "}" then break end
|
|
-- closelistfield(cc) here
|
|
local c = tok
|
|
if c == "<name>" then -- may be listfields or recfields
|
|
if lookahead() ~= "=" then -- look ahead: expression?
|
|
listfield(cc)
|
|
else
|
|
recfield(cc)
|
|
end
|
|
elseif c == "[" then -- constructor_item -> recfield
|
|
recfield(cc)
|
|
else -- constructor_part -> listfield
|
|
listfield(cc)
|
|
end
|
|
until not testnext(",") and not testnext(";")
|
|
check_match("}", "{", line)
|
|
-- lastlistfield(cc) here
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse the arguments (parameters) of a function declaration
|
|
-- * used in body()
|
|
----------------------------------------------------------------------
|
|
|
|
local function parlist()
|
|
-- parlist -> [ param { ',' param } ]
|
|
local nparams = 0
|
|
if tok ~= ")" then -- is 'parlist' not empty?
|
|
repeat
|
|
local c = tok
|
|
if c == "<name>" then -- param -> NAME
|
|
new_localvar(str_checkname())
|
|
nparams = nparams + 1
|
|
elseif c == "..." then
|
|
nextt()
|
|
fs.is_vararg = true
|
|
else
|
|
syntaxerror("<name> or '...' expected")
|
|
end
|
|
until fs.is_vararg or not testnext(",")
|
|
end--if
|
|
adjustlocalvars(nparams)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse the parameters of a function call
|
|
-- * contrast with parlist(), used in function declarations
|
|
-- * used in primaryexp()
|
|
----------------------------------------------------------------------
|
|
|
|
local function funcargs(f)
|
|
local args = {}
|
|
local line = ln
|
|
local c = tok
|
|
if c == "(" then -- funcargs -> '(' [ explist1 ] ')'
|
|
if line ~= lastln then
|
|
syntaxerror("ambiguous syntax (function call x new statement)")
|
|
end
|
|
nextt()
|
|
if tok == ")" then -- arg list is empty?
|
|
args.k = "VVOID"
|
|
else
|
|
explist1(args)
|
|
end
|
|
check_match(")", "(", line)
|
|
elseif c == "{" then -- funcargs -> constructor
|
|
constructor(args)
|
|
elseif c == "<string>" then -- funcargs -> STRING
|
|
codestring(args, seminfo)
|
|
nextt() -- must use 'seminfo' before 'next'
|
|
else
|
|
syntaxerror("function arguments expected")
|
|
return
|
|
end--if c
|
|
f.k = "VCALL"
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- mostly expression functions
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- parses an expression in parentheses or a single variable
|
|
-- * used in primaryexp()
|
|
----------------------------------------------------------------------
|
|
|
|
local function prefixexp(v)
|
|
-- prefixexp -> NAME | '(' expr ')'
|
|
local c = tok
|
|
if c == "(" then
|
|
local line = ln
|
|
nextt()
|
|
expr(v)
|
|
check_match(")", "(", line)
|
|
elseif c == "<name>" then
|
|
singlevar(v)
|
|
else
|
|
syntaxerror("unexpected symbol")
|
|
end--if c
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parses a prefixexp (an expression in parentheses or a single
|
|
-- variable) or a function call specification
|
|
-- * used in simpleexp(), assignment(), expr_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function primaryexp(v)
|
|
-- primaryexp ->
|
|
-- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
|
|
prefixexp(v)
|
|
while true do
|
|
local c = tok
|
|
if c == "." then -- field
|
|
field(v)
|
|
elseif c == "[" then -- '[' exp1 ']'
|
|
local key = {}
|
|
yindex(key)
|
|
elseif c == ":" then -- ':' NAME funcargs
|
|
local key = {}
|
|
nextt()
|
|
checkname(key)
|
|
funcargs(v)
|
|
elseif c == "(" or c == "<string>" or c == "{" then -- funcargs
|
|
funcargs(v)
|
|
else
|
|
return
|
|
end--if c
|
|
end--while
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parses general expression types, constants handled here
|
|
-- * used in subexpr()
|
|
----------------------------------------------------------------------
|
|
|
|
local function simpleexp(v)
|
|
-- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
|
|
-- constructor | FUNCTION body | primaryexp
|
|
local c = tok
|
|
if c == "<number>" then
|
|
v.k = "VKNUM"
|
|
elseif c == "<string>" then
|
|
codestring(v, seminfo)
|
|
elseif c == "nil" then
|
|
v.k = "VNIL"
|
|
elseif c == "true" then
|
|
v.k = "VTRUE"
|
|
elseif c == "false" then
|
|
v.k = "VFALSE"
|
|
elseif c == "..." then -- vararg
|
|
check_condition(fs.is_vararg == true,
|
|
"cannot use '...' outside a vararg function");
|
|
v.k = "VVARARG"
|
|
elseif c == "{" then -- constructor
|
|
constructor(v)
|
|
return
|
|
elseif c == "function" then
|
|
nextt()
|
|
body(v, false, ln)
|
|
return
|
|
else
|
|
primaryexp(v)
|
|
return
|
|
end--if c
|
|
nextt()
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- Parse subexpressions. Includes handling of unary operators and binary
|
|
-- operators. A subexpr is given the rhs priority level of the operator
|
|
-- immediately left of it, if any (limit is -1 if none,) and if a binop
|
|
-- is found, limit is compared with the lhs priority level of the binop
|
|
-- in order to determine which executes first.
|
|
-- * recursively called
|
|
-- * used in expr()
|
|
------------------------------------------------------------------------
|
|
|
|
local function subexpr(v, limit)
|
|
-- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
|
|
-- * where 'binop' is any binary operator with a priority
|
|
-- higher than 'limit'
|
|
local op = tok
|
|
local uop = unopr[op]
|
|
if uop then
|
|
nextt()
|
|
subexpr(v, UNARY_PRIORITY)
|
|
else
|
|
simpleexp(v)
|
|
end
|
|
-- expand while operators have priorities higher than 'limit'
|
|
op = tok
|
|
local binop = binopr_left[op]
|
|
while binop and binop > limit do
|
|
local v2 = {}
|
|
nextt()
|
|
-- read sub-expression with higher priority
|
|
local nextop = subexpr(v2, binopr_right[op])
|
|
op = nextop
|
|
binop = binopr_left[op]
|
|
end
|
|
return op -- return first untreated operator
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- Expression parsing starts here. Function subexpr is entered with the
|
|
-- left operator (which is non-existent) priority of -1, which is lower
|
|
-- than all actual operators. Expr information is returned in parm v.
|
|
-- * used in cond(), explist1(), index(), recfield(), listfield(),
|
|
-- prefixexp(), while_stat(), exp1()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function expr(v)
|
|
-- expr -> subexpr
|
|
subexpr(v, 0)
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- third level parsing functions
|
|
----------------------------------------------------------------------]]
|
|
|
|
------------------------------------------------------------------------
|
|
-- parse a variable assignment sequence
|
|
-- * recursively called
|
|
-- * used in expr_stat()
|
|
------------------------------------------------------------------------
|
|
|
|
local function assignment(v)
|
|
local e = {}
|
|
local c = v.v.k
|
|
check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL"
|
|
or c == "VINDEXED", "syntax error")
|
|
if testnext(",") then -- assignment -> ',' primaryexp assignment
|
|
local nv = {} -- expdesc
|
|
nv.v = {}
|
|
primaryexp(nv.v)
|
|
-- lparser.c deals with some register usage conflict here
|
|
assignment(nv)
|
|
else -- assignment -> '=' explist1
|
|
checknext("=")
|
|
explist1(e)
|
|
return -- avoid default
|
|
end
|
|
e.k = "VNONRELOC"
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a for loop body for both versions of the for loop
|
|
-- * used in fornum(), forlist()
|
|
----------------------------------------------------------------------
|
|
|
|
local function forbody(nvars, isnum)
|
|
-- forbody -> DO block
|
|
checknext("do")
|
|
enterblock(false) -- scope for declared variables
|
|
adjustlocalvars(nvars)
|
|
block()
|
|
leaveblock() -- end of scope for declared variables
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a numerical for loop, calls forbody()
|
|
-- * used in for_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function fornum(varname)
|
|
-- fornum -> NAME = exp1, exp1 [, exp1] DO body
|
|
local line = line
|
|
new_localvarliteral("(for index)")
|
|
new_localvarliteral("(for limit)")
|
|
new_localvarliteral("(for step)")
|
|
new_localvar(varname)
|
|
checknext("=")
|
|
exp1() -- initial value
|
|
checknext(",")
|
|
exp1() -- limit
|
|
if testnext(",") then
|
|
exp1() -- optional step
|
|
else
|
|
-- default step = 1
|
|
end
|
|
forbody(1, true)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a generic for loop, calls forbody()
|
|
-- * used in for_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function forlist(indexname)
|
|
-- forlist -> NAME {, NAME} IN explist1 DO body
|
|
local e = {}
|
|
-- create control variables
|
|
new_localvarliteral("(for generator)")
|
|
new_localvarliteral("(for state)")
|
|
new_localvarliteral("(for control)")
|
|
-- create declared variables
|
|
new_localvar(indexname)
|
|
local nvars = 1
|
|
while testnext(",") do
|
|
new_localvar(str_checkname())
|
|
nvars = nvars + 1
|
|
end
|
|
checknext("in")
|
|
local line = line
|
|
explist1(e)
|
|
forbody(nvars, false)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a function name specification
|
|
-- * used in func_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function funcname(v)
|
|
-- funcname -> NAME {field} [':' NAME]
|
|
local needself = false
|
|
singlevar(v)
|
|
while tok == "." do
|
|
field(v)
|
|
end
|
|
if tok == ":" then
|
|
needself = true
|
|
field(v)
|
|
end
|
|
return needself
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse the single expressions needed in numerical for loops
|
|
-- * used in fornum()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function exp1()
|
|
-- exp1 -> expr
|
|
local e = {}
|
|
expr(e)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse condition in a repeat statement or an if control structure
|
|
-- * used in repeat_stat(), test_then_block()
|
|
----------------------------------------------------------------------
|
|
|
|
local function cond()
|
|
-- cond -> expr
|
|
local v = {}
|
|
expr(v) -- read condition
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse part of an if control structure, including the condition
|
|
-- * used in if_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function test_then_block()
|
|
-- test_then_block -> [IF | ELSEIF] cond THEN block
|
|
nextt() -- skip IF or ELSEIF
|
|
cond()
|
|
checknext("then")
|
|
block() -- 'then' part
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a local function statement
|
|
-- * used in local_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function localfunc()
|
|
-- localfunc -> NAME body
|
|
local v, b = {}
|
|
new_localvar(str_checkname())
|
|
v.k = "VLOCAL"
|
|
adjustlocalvars(1)
|
|
body(b, false, ln)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a local variable declaration statement
|
|
-- * used in local_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
local function localstat()
|
|
-- localstat -> NAME {',' NAME} ['=' explist1]
|
|
local nvars = 0
|
|
local e = {}
|
|
repeat
|
|
new_localvar(str_checkname())
|
|
nvars = nvars + 1
|
|
until not testnext(",")
|
|
if testnext("=") then
|
|
explist1(e)
|
|
else
|
|
e.k = "VVOID"
|
|
end
|
|
adjustlocalvars(nvars)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a list of comma-separated expressions
|
|
-- * used in return_stat(), localstat(), funcargs(), assignment(),
|
|
-- forlist()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function explist1(e)
|
|
-- explist1 -> expr { ',' expr }
|
|
expr(e)
|
|
while testnext(",") do
|
|
expr(e)
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse function declaration body
|
|
-- * used in simpleexp(), localfunc(), func_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function body(e, needself, line)
|
|
-- body -> '(' parlist ')' chunk END
|
|
open_func()
|
|
checknext("(")
|
|
if needself then
|
|
new_localvarliteral("self", true)
|
|
adjustlocalvars(1)
|
|
end
|
|
parlist()
|
|
checknext(")")
|
|
chunk()
|
|
check_match("end", "function", line)
|
|
close_func()
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a code block or unit
|
|
-- * used in do_stat(), while_stat(), forbody(), test_then_block(),
|
|
-- if_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function block()
|
|
-- block -> chunk
|
|
enterblock(false)
|
|
chunk()
|
|
leaveblock()
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- second level parsing functions, all with '_stat' suffix
|
|
-- * since they are called via a table lookup, they cannot be local
|
|
-- functions (a lookup table of local functions might be smaller...)
|
|
-- * stat() -> *_stat()
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- initial parsing for a for loop, calls fornum() or forlist()
|
|
-- * removed 'line' parameter (used to set debug information only)
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function for_stat()
|
|
-- stat -> for_stat -> FOR (fornum | forlist) END
|
|
local line = line
|
|
enterblock(true) -- scope for loop and control variables
|
|
nextt() -- skip 'for'
|
|
local varname = str_checkname() -- first variable name
|
|
local c = tok
|
|
if c == "=" then
|
|
fornum(varname)
|
|
elseif c == "," or c == "in" then
|
|
forlist(varname)
|
|
else
|
|
syntaxerror("'=' or 'in' expected")
|
|
end
|
|
check_match("end", "for", line)
|
|
leaveblock() -- loop scope (`break' jumps to this point)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a while-do control structure, body processed by block()
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function while_stat()
|
|
-- stat -> while_stat -> WHILE cond DO block END
|
|
local line = line
|
|
nextt() -- skip WHILE
|
|
cond() -- parse condition
|
|
enterblock(true)
|
|
checknext("do")
|
|
block()
|
|
check_match("end", "while", line)
|
|
leaveblock()
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a repeat-until control structure, body parsed by chunk()
|
|
-- * originally, repeatstat() calls breakstat() too if there is an
|
|
-- upvalue in the scope block; nothing is actually lexed, it is
|
|
-- actually the common code in breakstat() for closing of upvalues
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function repeat_stat()
|
|
-- stat -> repeat_stat -> REPEAT block UNTIL cond
|
|
local line = line
|
|
enterblock(true) -- loop block
|
|
enterblock(false) -- scope block
|
|
nextt() -- skip REPEAT
|
|
chunk()
|
|
check_match("until", "repeat", line)
|
|
cond()
|
|
-- close upvalues at scope level below
|
|
leaveblock() -- finish scope
|
|
leaveblock() -- finish loop
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse an if control structure
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function if_stat()
|
|
-- stat -> if_stat -> IF cond THEN block
|
|
-- {ELSEIF cond THEN block} [ELSE block] END
|
|
local line = line
|
|
local v = {}
|
|
test_then_block() -- IF cond THEN block
|
|
while tok == "elseif" do
|
|
test_then_block() -- ELSEIF cond THEN block
|
|
end
|
|
if tok == "else" then
|
|
nextt() -- skip ELSE
|
|
block() -- 'else' part
|
|
end
|
|
check_match("end", "if", line)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a return statement
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function return_stat()
|
|
-- stat -> return_stat -> RETURN explist
|
|
local e = {}
|
|
nextt() -- skip RETURN
|
|
local c = tok
|
|
if block_follow[c] or c == ";" then
|
|
-- return no values
|
|
else
|
|
explist1(e) -- optional return values
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a break statement
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function break_stat()
|
|
-- stat -> break_stat -> BREAK
|
|
local bl = fs.bl
|
|
nextt() -- skip BREAK
|
|
while bl and not bl.isbreakable do -- find a breakable block
|
|
bl = bl.prev
|
|
end
|
|
if not bl then
|
|
syntaxerror("no loop to break")
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a function call with no returns or an assignment statement
|
|
-- * the struct with .prev is used for name searching in lparse.c,
|
|
-- so it is retained for now; present in assignment() also
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function expr_stat()
|
|
-- stat -> expr_stat -> func | assignment
|
|
local v = {}
|
|
v.v = {}
|
|
primaryexp(v.v)
|
|
if v.v.k == "VCALL" then -- stat -> func
|
|
-- call statement uses no results
|
|
else -- stat -> assignment
|
|
v.prev = nil
|
|
assignment(v)
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a function statement
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function function_stat()
|
|
-- stat -> function_stat -> FUNCTION funcname body
|
|
local line = line
|
|
local v, b = {}, {}
|
|
nextt() -- skip FUNCTION
|
|
local needself = funcname(v)
|
|
body(b, needself, line)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a simple block enclosed by a DO..END pair
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function do_stat()
|
|
-- stat -> do_stat -> DO block END
|
|
local line = line
|
|
nextt() -- skip DO
|
|
block()
|
|
check_match("end", "do", line)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a statement starting with LOCAL
|
|
-- * used in stat()
|
|
----------------------------------------------------------------------
|
|
|
|
function local_stat()
|
|
-- stat -> local_stat -> LOCAL FUNCTION localfunc
|
|
-- -> LOCAL localstat
|
|
nextt() -- skip LOCAL
|
|
if testnext("function") then -- local function?
|
|
localfunc()
|
|
else
|
|
localstat()
|
|
end
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- main functions, top level parsing functions
|
|
-- * accessible functions are: init(lexer), parser()
|
|
-- * [entry] -> parser() -> chunk() -> stat()
|
|
----------------------------------------------------------------------]]
|
|
|
|
----------------------------------------------------------------------
|
|
-- initial parsing for statements, calls '_stat' suffixed functions
|
|
-- * used in chunk()
|
|
----------------------------------------------------------------------
|
|
|
|
local function stat()
|
|
-- stat -> if_stat while_stat do_stat for_stat repeat_stat
|
|
-- function_stat local_stat return_stat break_stat
|
|
-- expr_stat
|
|
line = ln -- may be needed for error messages
|
|
local c = tok
|
|
local fn = stat_call[c]
|
|
-- handles: if while do for repeat function local return break
|
|
if fn then
|
|
_G[fn]()
|
|
-- return or break must be last statement
|
|
if c == "return" or c == "break" then return true end
|
|
else
|
|
expr_stat()
|
|
end
|
|
return false
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parse a chunk, which consists of a bunch of statements
|
|
-- * used in parser(), body(), block(), repeat_stat()
|
|
----------------------------------------------------------------------
|
|
|
|
-- this is a forward-referenced local
|
|
function chunk()
|
|
-- chunk -> { stat [';'] }
|
|
local islast = false
|
|
while not islast and not block_follow[tok] do
|
|
islast = stat()
|
|
testnext(";")
|
|
end
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- performs parsing, returns parsed data structure
|
|
----------------------------------------------------------------------
|
|
|
|
function parser()
|
|
open_func()
|
|
fs.is_vararg = true -- main func. is always vararg
|
|
nextt() -- read first token
|
|
chunk()
|
|
check("<eof>")
|
|
close_func()
|
|
return globalinfo, localinfo
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- initialization function
|
|
----------------------------------------------------------------------
|
|
|
|
function init(tokorig, seminfoorig, toklnorig)
|
|
tpos = 1 -- token position
|
|
top_fs = {} -- reset top level function state
|
|
------------------------------------------------------------------
|
|
-- set up grammar-only token tables; impedance-matching...
|
|
-- note that constants returned by the lexer is source-level, so
|
|
-- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
|
|
------------------------------------------------------------------
|
|
local j = 1
|
|
toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {}
|
|
for i = 1, #tokorig do
|
|
local tok = tokorig[i]
|
|
local yep = true
|
|
if tok == "TK_KEYWORD" or tok == "TK_OP" then
|
|
tok = seminfoorig[i]
|
|
elseif tok == "TK_NAME" then
|
|
tok = "<name>"
|
|
seminfolist[j] = seminfoorig[i]
|
|
elseif tok == "TK_NUMBER" then
|
|
tok = "<number>"
|
|
seminfolist[j] = 0 -- fake!
|
|
elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
|
|
tok = "<string>"
|
|
seminfolist[j] = "" -- fake!
|
|
elseif tok == "TK_EOS" then
|
|
tok = "<eof>"
|
|
else
|
|
-- non-grammar tokens; ignore them
|
|
yep = false
|
|
end
|
|
if yep then -- set rest of the information
|
|
toklist[j] = tok
|
|
toklnlist[j] = toklnorig[i]
|
|
xreflist[j] = i
|
|
j = j + 1
|
|
end
|
|
end--for
|
|
------------------------------------------------------------------
|
|
-- initialize data structures for variable tracking
|
|
------------------------------------------------------------------
|
|
globalinfo, globallookup, localinfo = {}, {}, {}
|
|
ilocalinfo, ilocalrefs = {}, {}
|
|
end
|
|
|
|
return _G
|
|
end)
|
|
package.preload['optlex'] = (function (...)
|
|
--[[--------------------------------------------------------------------
|
|
|
|
optlex.lua: does lexer-based optimizations
|
|
This file is part of LuaSrcDiet.
|
|
|
|
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
|
|
The COPYRIGHT file describes the conditions
|
|
under which this software may be distributed.
|
|
|
|
See the ChangeLog for more information.
|
|
|
|
----------------------------------------------------------------------]]
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- NOTES:
|
|
-- * For more lexer-based optimization ideas, see the TODO items or
|
|
-- look at technotes.txt.
|
|
-- * TODO: general string delimiter conversion optimizer
|
|
-- * TODO: (numbers) warn if overly significant digit
|
|
----------------------------------------------------------------------]]
|
|
|
|
local base = _G
|
|
local string = string
|
|
module "optlex"
|
|
local match = string.match
|
|
local sub = string.sub
|
|
local find = string.find
|
|
local rep = string.rep
|
|
local print
|
|
|
|
------------------------------------------------------------------------
|
|
-- variables and data structures
|
|
------------------------------------------------------------------------
|
|
|
|
-- error function, can override by setting own function into module
|
|
error = base.error
|
|
|
|
warn = {} -- table for warning flags
|
|
|
|
local stoks, sinfos, stoklns -- source lists
|
|
|
|
local is_realtoken = { -- significant (grammar) tokens
|
|
TK_KEYWORD = true,
|
|
TK_NAME = true,
|
|
TK_NUMBER = true,
|
|
TK_STRING = true,
|
|
TK_LSTRING = true,
|
|
TK_OP = true,
|
|
TK_EOS = true,
|
|
}
|
|
local is_faketoken = { -- whitespace (non-grammar) tokens
|
|
TK_COMMENT = true,
|
|
TK_LCOMMENT = true,
|
|
TK_EOL = true,
|
|
TK_SPACE = true,
|
|
}
|
|
|
|
local opt_details -- for extra information
|
|
|
|
------------------------------------------------------------------------
|
|
-- true if current token is at the start of a line
|
|
-- * skips over deleted tokens via recursion
|
|
------------------------------------------------------------------------
|
|
|
|
local function atlinestart(i)
|
|
local tok = stoks[i - 1]
|
|
if i <= 1 or tok == "TK_EOL" then
|
|
return true
|
|
elseif tok == "" then
|
|
return atlinestart(i - 1)
|
|
end
|
|
return false
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- true if current token is at the end of a line
|
|
-- * skips over deleted tokens via recursion
|
|
------------------------------------------------------------------------
|
|
|
|
local function atlineend(i)
|
|
local tok = stoks[i + 1]
|
|
if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
|
|
return true
|
|
elseif tok == "" then
|
|
return atlineend(i + 1)
|
|
end
|
|
return false
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- counts comment EOLs inside a long comment
|
|
-- * in order to keep line numbering, EOLs need to be reinserted
|
|
------------------------------------------------------------------------
|
|
|
|
local function commenteols(lcomment)
|
|
local sep = #match(lcomment, "^%-%-%[=*%[")
|
|
local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
|
|
local i, c = 1, 0
|
|
while true do
|
|
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
|
|
if not p then break end -- if no matches, done
|
|
i = p + 1
|
|
c = c + 1
|
|
if #s > 0 and r ~= s then -- skip CRLF or LFCR
|
|
i = i + 1
|
|
end
|
|
end
|
|
return c
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- compares two tokens (i, j) and returns the whitespace required
|
|
-- * important! see technotes.txt for more information
|
|
-- * only two grammar/real tokens are being considered
|
|
-- * if "", no separation is needed
|
|
-- * if " ", then at least one whitespace (or EOL) is required
|
|
------------------------------------------------------------------------
|
|
|
|
local function checkpair(i, j)
|
|
local match = match
|
|
local t1, t2 = stoks[i], stoks[j]
|
|
--------------------------------------------------------------------
|
|
if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
|
|
t2 == "TK_STRING" or t2 == "TK_LSTRING" then
|
|
return ""
|
|
--------------------------------------------------------------------
|
|
elseif t1 == "TK_OP" or t2 == "TK_OP" then
|
|
if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
|
|
(t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
|
|
return ""
|
|
end
|
|
if t1 == "TK_OP" and t2 == "TK_OP" then
|
|
-- for TK_OP/TK_OP pairs, see notes in technotes.txt
|
|
local op, op2 = sinfos[i], sinfos[j]
|
|
if (match(op, "^%.%.?$") and match(op2, "^%.")) or
|
|
(match(op, "^[~=<>]$") and op2 == "=") or
|
|
(op == "[" and (op2 == "[" or op2 == "=")) then
|
|
return " "
|
|
end
|
|
return ""
|
|
end
|
|
-- "TK_OP" + "TK_NUMBER" case
|
|
local op = sinfos[i]
|
|
if t2 == "TK_OP" then op = sinfos[j] end
|
|
if match(op, "^%.%.?%.?$") then
|
|
return " "
|
|
end
|
|
return ""
|
|
--------------------------------------------------------------------
|
|
else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
|
|
return " "
|
|
--------------------------------------------------------------------
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- repack tokens, removing deletions caused by optimization process
|
|
------------------------------------------------------------------------
|
|
|
|
local function repack_tokens()
|
|
local dtoks, dinfos, dtoklns = {}, {}, {}
|
|
local j = 1
|
|
for i = 1, #stoks do
|
|
local tok = stoks[i]
|
|
if tok ~= "" then
|
|
dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
|
|
j = j + 1
|
|
end
|
|
end
|
|
stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- number optimization
|
|
-- * optimization using string formatting functions is one way of doing
|
|
-- this, but here, we consider all cases and handle them separately
|
|
-- (possibly an idiotic approach...)
|
|
-- * scientific notation being generated is not in canonical form, this
|
|
-- may or may not be a bad thing, feedback welcome
|
|
-- * note: intermediate portions need to fit into a normal number range
|
|
-- * optimizations can be divided based on number patterns:
|
|
-- * hexadecimal:
|
|
-- (1) no need to remove leading zeros, just skip to (2)
|
|
-- (2) convert to integer if size equal or smaller
|
|
-- * change if equal size -> lose the 'x' to reduce entropy
|
|
-- (3) number is then processed as an integer
|
|
-- (4) note: does not make 0[xX] consistent
|
|
-- * integer:
|
|
-- (1) note: includes anything with trailing ".", ".0", ...
|
|
-- (2) remove useless fractional part, if present, e.g. 123.000
|
|
-- (3) remove leading zeros, e.g. 000123
|
|
-- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
|
|
-- * with fraction:
|
|
-- (1) split into digits dot digits
|
|
-- (2) if no integer portion, take as zero (can omit later)
|
|
-- (3) handle degenerate .000 case, after which the fractional part
|
|
-- must be non-zero (if zero, it's matched as an integer)
|
|
-- (4) remove trailing zeros for fractional portion
|
|
-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
|
|
-- (6) otherwise p == 0 and the form is .q, e.g. .000123
|
|
-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
|
|
-- * scientific:
|
|
-- (1) split into (digits dot digits) [eE] ([+-] digits)
|
|
-- (2) if significand has ".", shift it out so it becomes an integer
|
|
-- (3) if significand is zero, just use zero
|
|
-- (4) remove leading zeros for significand
|
|
-- (5) shift out trailing zeros for significand
|
|
-- (6) examine exponent and determine which format is best:
|
|
-- integer, with fraction, scientific
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_number(i)
|
|
local before = sinfos[i] -- 'before'
|
|
local z = before -- working representation
|
|
local y -- 'after', if better
|
|
--------------------------------------------------------------------
|
|
if match(z, "^0[xX]") then -- hexadecimal number
|
|
local v = base.tostring(base.tonumber(z))
|
|
if #v <= #z then
|
|
z = v -- change to integer, AND continue
|
|
else
|
|
return -- no change; stick to hex
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
if match(z, "^%d+%.?0*$") then -- integer or has useless frac
|
|
z = match(z, "^(%d+)%.?0*$") -- int portion only
|
|
if z + 0 > 0 then
|
|
z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
|
|
local v = #match(z, "0*$")
|
|
local nv = base.tostring(v)
|
|
if v > #nv + 1 then -- scientific is shorter
|
|
z = sub(z, 1, #z - v).."e"..nv
|
|
end
|
|
y = z
|
|
else
|
|
y = "0" -- basic zero
|
|
end
|
|
--------------------------------------------------------------------
|
|
elseif not match(z, "[eE]") then -- number with fraction part
|
|
local p, q = match(z, "^(%d*)%.(%d+)$") -- split
|
|
if p == "" then p = 0 end -- int part zero
|
|
if q + 0 == 0 and p == 0 then
|
|
y = "0" -- degenerate .000 case
|
|
else
|
|
-- now, q > 0 holds and p is a number
|
|
local v = #match(q, "0*$") -- remove trailing zeros
|
|
if v > 0 then
|
|
q = sub(q, 1, #q - v)
|
|
end
|
|
-- if p > 0, nothing else we can do to simplify p.q case
|
|
if p + 0 > 0 then
|
|
y = p.."."..q
|
|
else
|
|
y = "."..q -- tentative, e.g. .000123
|
|
local v = #match(q, "^0*") -- # leading spaces
|
|
local w = #q - v -- # significant digits
|
|
local nv = base.tostring(#q)
|
|
-- e.g. compare 123e-6 versus .000123
|
|
if w + 2 + #nv < 1 + #q then
|
|
y = sub(q, -w).."e-"..nv
|
|
end
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
else -- scientific number
|
|
local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
|
|
ex = base.tonumber(ex)
|
|
-- if got ".", shift out fractional portion of significand
|
|
local p, q = match(sig, "^(%d*)%.(%d*)$")
|
|
if p then
|
|
ex = ex - #q
|
|
sig = p..q
|
|
end
|
|
if sig + 0 == 0 then
|
|
y = "0" -- basic zero
|
|
else
|
|
local v = #match(sig, "^0*") -- remove leading zeros
|
|
sig = sub(sig, v + 1)
|
|
v = #match(sig, "0*$") -- shift out trailing zeros
|
|
if v > 0 then
|
|
sig = sub(sig, 1, #sig - v)
|
|
ex = ex + v
|
|
end
|
|
-- examine exponent and determine which format is best
|
|
local nex = base.tostring(ex)
|
|
if ex == 0 then -- it's just an integer
|
|
y = sig
|
|
elseif ex > 0 and (ex <= 1 + #nex) then -- a number
|
|
y = sig..rep("0", ex)
|
|
elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
|
|
v = #sig + ex
|
|
y = sub(sig, 1, v).."."..sub(sig, v + 1)
|
|
elseif ex < 0 and (#nex >= -ex - #sig) then
|
|
-- e.g. compare 1234e-5 versus .01234
|
|
-- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
|
|
-- -> #nex >= -ex - #sig
|
|
v = -ex - #sig
|
|
y = "."..rep("0", v)..sig
|
|
else -- non-canonical scientific representation
|
|
y = sig.."e"..ex
|
|
end
|
|
end--if sig
|
|
end
|
|
--------------------------------------------------------------------
|
|
if y and y ~= sinfos[i] then
|
|
if opt_details then
|
|
print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
|
|
opt_details = opt_details + 1
|
|
end
|
|
sinfos[i] = y
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- string optimization
|
|
-- * note: works on well-formed strings only!
|
|
-- * optimizations on characters can be summarized as follows:
|
|
-- \a\b\f\n\r\t\v -- no change
|
|
-- \\ -- no change
|
|
-- \"\' -- depends on delim, other can remove \
|
|
-- \[\] -- remove \
|
|
-- \<char> -- general escape, remove \
|
|
-- \<eol> -- normalize the EOL only
|
|
-- \ddd -- if \a\b\f\n\r\t\v, change to latter
|
|
-- if other < ascii 32, keep ddd but zap leading zeros
|
|
-- if >= ascii 32, translate it into the literal, then also
|
|
-- do escapes for \\,\",\' cases
|
|
-- <other> -- no change
|
|
-- * switch delimiters if string becomes shorter
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_string(I)
|
|
local info = sinfos[I]
|
|
local delim = sub(info, 1, 1) -- delimiter used
|
|
local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
|
|
local z = sub(info, 2, -2) -- actual string
|
|
local i = 1
|
|
local c_delim, c_ndelim = 0, 0 -- "/' counts
|
|
--------------------------------------------------------------------
|
|
while i <= #z do
|
|
local c = sub(z, i, i)
|
|
----------------------------------------------------------------
|
|
if c == "\\" then -- escaped stuff
|
|
local j = i + 1
|
|
local d = sub(z, j, j)
|
|
local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
|
|
------------------------------------------------------------
|
|
if not p then -- \<char> -- remove \
|
|
z = sub(z, 1, i - 1)..sub(z, j)
|
|
i = i + 1
|
|
------------------------------------------------------------
|
|
elseif p <= 8 then -- \a\b\f\n\r\t\v\\
|
|
i = i + 2 -- no change
|
|
------------------------------------------------------------
|
|
elseif p <= 10 then -- \<eol> -- normalize EOL
|
|
local eol = sub(z, j, j + 1)
|
|
if eol == "\r\n" or eol == "\n\r" then
|
|
z = sub(z, 1, i).."\n"..sub(z, j + 2)
|
|
elseif p == 10 then -- \r case
|
|
z = sub(z, 1, i).."\n"..sub(z, j + 1)
|
|
end
|
|
i = i + 2
|
|
------------------------------------------------------------
|
|
elseif p <= 12 then -- \"\' -- remove \ for ndelim
|
|
if d == delim then
|
|
c_delim = c_delim + 1
|
|
i = i + 2
|
|
else
|
|
c_ndelim = c_ndelim + 1
|
|
z = sub(z, 1, i - 1)..sub(z, j)
|
|
i = i + 1
|
|
end
|
|
------------------------------------------------------------
|
|
else -- \ddd -- various steps
|
|
local s = match(z, "^(%d%d?%d?)", j)
|
|
j = i + 1 + #s -- skip to location
|
|
local cv = s + 0
|
|
local cc = string.char(cv)
|
|
local p = find("\a\b\f\n\r\t\v", cc, 1, true)
|
|
if p then -- special escapes
|
|
s = "\\"..sub("abfnrtv", p, p)
|
|
elseif cv < 32 then -- normalized \ddd
|
|
s = "\\"..cv
|
|
elseif cc == delim then -- \<delim>
|
|
s = "\\"..cc
|
|
c_delim = c_delim + 1
|
|
elseif cc == "\\" then -- \\
|
|
s = "\\\\"
|
|
else -- literal character
|
|
s = cc
|
|
if cc == ndelim then
|
|
c_ndelim = c_ndelim + 1
|
|
end
|
|
end
|
|
z = sub(z, 1, i - 1)..s..sub(z, j)
|
|
i = i + #s
|
|
------------------------------------------------------------
|
|
end--if p
|
|
----------------------------------------------------------------
|
|
else-- c ~= "\\" -- <other> -- no change
|
|
i = i + 1
|
|
if c == ndelim then -- count ndelim, for switching delimiters
|
|
c_ndelim = c_ndelim + 1
|
|
end
|
|
----------------------------------------------------------------
|
|
end--if c
|
|
end--while
|
|
--------------------------------------------------------------------
|
|
-- switching delimiters, a long-winded derivation:
|
|
-- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
|
|
-- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
|
|
-- simplifying the condition (1)>(2) --> c_delim > c_ndelim
|
|
if c_delim > c_ndelim then
|
|
i = 1
|
|
while i <= #z do
|
|
local p, q, r = find(z, "([\'\"])", i)
|
|
if not p then break end
|
|
if r == delim then -- \<delim> -> <delim>
|
|
z = sub(z, 1, p - 2)..sub(z, p)
|
|
i = p
|
|
else-- r == ndelim -- <ndelim> -> \<ndelim>
|
|
z = sub(z, 1, p - 1).."\\"..sub(z, p)
|
|
i = p + 2
|
|
end
|
|
end--while
|
|
delim = ndelim -- actually change delimiters
|
|
end
|
|
--------------------------------------------------------------------
|
|
z = delim..z..delim
|
|
if z ~= sinfos[I] then
|
|
if opt_details then
|
|
print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
|
|
opt_details = opt_details + 1
|
|
end
|
|
sinfos[I] = z
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- long string optimization
|
|
-- * note: warning flagged if trailing whitespace found, not trimmed
|
|
-- * remove first optional newline
|
|
-- * normalize embedded newlines
|
|
-- * reduce '=' separators in delimiters if possible
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_lstring(I)
|
|
local info = sinfos[I]
|
|
local delim1 = match(info, "^%[=*%[") -- cut out delimiters
|
|
local sep = #delim1
|
|
local delim2 = sub(info, -sep, -1)
|
|
local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
|
|
local y = ""
|
|
local i = 1
|
|
--------------------------------------------------------------------
|
|
while true do
|
|
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
|
|
-- deal with a single line
|
|
local ln
|
|
if not p then
|
|
ln = sub(z, i)
|
|
elseif p >= i then
|
|
ln = sub(z, i, p - 1)
|
|
end
|
|
if ln ~= "" then
|
|
-- flag a warning if there are trailing spaces, won't optimize!
|
|
if match(ln, "%s+$") then
|
|
warn.lstring = "trailing whitespace in long string near line "..stoklns[I]
|
|
end
|
|
y = y..ln
|
|
end
|
|
if not p then -- done if no more EOLs
|
|
break
|
|
end
|
|
-- deal with line endings, normalize them
|
|
i = p + 1
|
|
if p then
|
|
if #s > 0 and r ~= s then -- skip CRLF or LFCR
|
|
i = i + 1
|
|
end
|
|
-- skip first newline, which can be safely deleted
|
|
if not(i == 1 and i == p) then
|
|
y = y.."\n"
|
|
end
|
|
end
|
|
end--while
|
|
--------------------------------------------------------------------
|
|
-- handle possible deletion of one or more '=' separators
|
|
if sep >= 3 then
|
|
local chk, okay = sep - 1
|
|
-- loop to test ending delimiter with less of '=' down to zero
|
|
while chk >= 2 do
|
|
local delim = "%]"..rep("=", chk - 2).."%]"
|
|
if not match(y, delim) then okay = chk end
|
|
chk = chk - 1
|
|
end
|
|
if okay then -- change delimiters
|
|
sep = rep("=", okay - 2)
|
|
delim1, delim2 = "["..sep.."[", "]"..sep.."]"
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
sinfos[I] = delim1..y..delim2
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- long comment optimization
|
|
-- * note: does not remove first optional newline
|
|
-- * trim trailing whitespace
|
|
-- * normalize embedded newlines
|
|
-- * reduce '=' separators in delimiters if possible
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_lcomment(I)
|
|
local info = sinfos[I]
|
|
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
|
|
local sep = #delim1
|
|
local delim2 = sub(info, -sep, -1)
|
|
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
|
|
local y = ""
|
|
local i = 1
|
|
--------------------------------------------------------------------
|
|
while true do
|
|
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
|
|
-- deal with a single line, extract and check trailing whitespace
|
|
local ln
|
|
if not p then
|
|
ln = sub(z, i)
|
|
elseif p >= i then
|
|
ln = sub(z, i, p - 1)
|
|
end
|
|
if ln ~= "" then
|
|
-- trim trailing whitespace if non-empty line
|
|
local ws = match(ln, "%s*$")
|
|
if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
|
|
y = y..ln
|
|
end
|
|
if not p then -- done if no more EOLs
|
|
break
|
|
end
|
|
-- deal with line endings, normalize them
|
|
i = p + 1
|
|
if p then
|
|
if #s > 0 and r ~= s then -- skip CRLF or LFCR
|
|
i = i + 1
|
|
end
|
|
y = y.."\n"
|
|
end
|
|
end--while
|
|
--------------------------------------------------------------------
|
|
-- handle possible deletion of one or more '=' separators
|
|
sep = sep - 2
|
|
if sep >= 3 then
|
|
local chk, okay = sep - 1
|
|
-- loop to test ending delimiter with less of '=' down to zero
|
|
while chk >= 2 do
|
|
local delim = "%]"..rep("=", chk - 2).."%]"
|
|
if not match(y, delim) then okay = chk end
|
|
chk = chk - 1
|
|
end
|
|
if okay then -- change delimiters
|
|
sep = rep("=", okay - 2)
|
|
delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
sinfos[I] = delim1..y..delim2
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- short comment optimization
|
|
-- * trim trailing whitespace
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_comment(i)
|
|
local info = sinfos[i]
|
|
local ws = match(info, "%s*$") -- just look from end of string
|
|
if #ws > 0 then
|
|
info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
|
|
end
|
|
sinfos[i] = info
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- returns true if string found in long comment
|
|
-- * this is a feature to keep copyright or license texts
|
|
------------------------------------------------------------------------
|
|
|
|
local function keep_lcomment(opt_keep, info)
|
|
if not opt_keep then return false end -- option not set
|
|
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
|
|
local sep = #delim1
|
|
local delim2 = sub(info, -sep, -1)
|
|
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
|
|
if find(z, opt_keep, 1, true) then -- try to match
|
|
return true
|
|
end
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- main entry point
|
|
-- * currently, lexer processing has 2 passes
|
|
-- * processing is done on a line-oriented basis, which is easier to
|
|
-- grok due to the next point...
|
|
-- * since there are various options that can be enabled or disabled,
|
|
-- processing is a little messy or convoluted
|
|
------------------------------------------------------------------------
|
|
|
|
function optimize(option, toklist, semlist, toklnlist)
|
|
--------------------------------------------------------------------
|
|
-- set option flags
|
|
--------------------------------------------------------------------
|
|
local opt_comments = option["opt-comments"]
|
|
local opt_whitespace = option["opt-whitespace"]
|
|
local opt_emptylines = option["opt-emptylines"]
|
|
local opt_eols = option["opt-eols"]
|
|
local opt_strings = option["opt-strings"]
|
|
local opt_numbers = option["opt-numbers"]
|
|
local opt_keep = option.KEEP
|
|
opt_details = option.DETAILS and 0 -- upvalues for details display
|
|
print = print or base.print
|
|
if opt_eols then -- forced settings, otherwise won't work properly
|
|
opt_comments = true
|
|
opt_whitespace = true
|
|
opt_emptylines = true
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- variable initialization
|
|
--------------------------------------------------------------------
|
|
stoks, sinfos, stoklns -- set source lists
|
|
= toklist, semlist, toklnlist
|
|
local i = 1 -- token position
|
|
local tok, info -- current token
|
|
local prev -- position of last grammar token
|
|
-- on same line (for TK_SPACE stuff)
|
|
--------------------------------------------------------------------
|
|
-- changes a token, info pair
|
|
--------------------------------------------------------------------
|
|
local function settoken(tok, info, I)
|
|
I = I or i
|
|
stoks[I] = tok or ""
|
|
sinfos[I] = info or ""
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- processing loop (PASS 1)
|
|
--------------------------------------------------------------------
|
|
while true do
|
|
tok, info = stoks[i], sinfos[i]
|
|
----------------------------------------------------------------
|
|
local atstart = atlinestart(i) -- set line begin flag
|
|
if atstart then prev = nil end
|
|
----------------------------------------------------------------
|
|
if tok == "TK_EOS" then -- end of stream/pass
|
|
break
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
|
|
tok == "TK_NAME" or -- operators
|
|
tok == "TK_OP" then
|
|
-- TK_KEYWORD and TK_OP can't be optimized without a big
|
|
-- optimization framework; it would be more of an optimizing
|
|
-- compiler, not a source code compressor
|
|
-- TK_NAME that are locals needs parser to analyze/optimize
|
|
prev = i
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_NUMBER" then -- numbers
|
|
if opt_numbers then
|
|
do_number(i) -- optimize
|
|
end
|
|
prev = i
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_STRING" or -- strings, long strings
|
|
tok == "TK_LSTRING" then
|
|
if opt_strings then
|
|
if tok == "TK_STRING" then
|
|
do_string(i) -- optimize
|
|
else
|
|
do_lstring(i) -- optimize
|
|
end
|
|
end
|
|
prev = i
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_COMMENT" then -- short comments
|
|
if opt_comments then
|
|
if i == 1 and sub(info, 1, 1) == "#" then
|
|
-- keep shbang comment, trim whitespace
|
|
do_comment(i)
|
|
else
|
|
-- safe to delete, as a TK_EOL (or TK_EOS) always follows
|
|
settoken() -- remove entirely
|
|
end
|
|
elseif opt_whitespace then -- trim whitespace only
|
|
do_comment(i)
|
|
end
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_LCOMMENT" then -- long comments
|
|
if keep_lcomment(opt_keep, info) then
|
|
------------------------------------------------------------
|
|
-- if --keep, we keep a long comment if <msg> is found;
|
|
-- this is a feature to keep copyright or license texts
|
|
if opt_whitespace then -- trim whitespace only
|
|
do_lcomment(i)
|
|
end
|
|
prev = i
|
|
elseif opt_comments then
|
|
local eols = commenteols(info)
|
|
------------------------------------------------------------
|
|
-- prepare opt_emptylines case first, if a disposable token
|
|
-- follows, current one is safe to dump, else keep a space;
|
|
-- it is implied that the operation is safe for '-', because
|
|
-- current is a TK_LCOMMENT, and must be separate from a '-'
|
|
if is_faketoken[stoks[i + 1]] then
|
|
settoken() -- remove entirely
|
|
tok = ""
|
|
else
|
|
settoken("TK_SPACE", " ")
|
|
end
|
|
------------------------------------------------------------
|
|
-- if there are embedded EOLs to keep and opt_emptylines is
|
|
-- disabled, then switch the token into one or more EOLs
|
|
if not opt_emptylines and eols > 0 then
|
|
settoken("TK_EOL", rep("\n", eols))
|
|
end
|
|
------------------------------------------------------------
|
|
-- if optimizing whitespaces, force reinterpretation of the
|
|
-- token to give a chance for the space to be optimized away
|
|
if opt_whitespace and tok ~= "" then
|
|
i = i - 1 -- to reinterpret
|
|
end
|
|
------------------------------------------------------------
|
|
else -- disabled case
|
|
if opt_whitespace then -- trim whitespace only
|
|
do_lcomment(i)
|
|
end
|
|
prev = i
|
|
end
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_EOL" then -- line endings
|
|
if atstart and opt_emptylines then
|
|
settoken() -- remove entirely
|
|
elseif info == "\r\n" or info == "\n\r" then
|
|
-- normalize the rest of the EOLs for CRLF/LFCR only
|
|
-- (note that TK_LCOMMENT can change into several EOLs)
|
|
settoken("TK_EOL", "\n")
|
|
end
|
|
----------------------------------------------------------------
|
|
elseif tok == "TK_SPACE" then -- whitespace
|
|
if opt_whitespace then
|
|
if atstart or atlineend(i) then
|
|
-- delete leading and trailing whitespace
|
|
settoken() -- remove entirely
|
|
else
|
|
------------------------------------------------------------
|
|
-- at this point, since leading whitespace have been removed,
|
|
-- there should be a either a real token or a TK_LCOMMENT
|
|
-- prior to hitting this whitespace; the TK_LCOMMENT case
|
|
-- only happens if opt_comments is disabled; so prev ~= nil
|
|
local ptok = stoks[prev]
|
|
if ptok == "TK_LCOMMENT" then
|
|
-- previous TK_LCOMMENT can abut with anything
|
|
settoken() -- remove entirely
|
|
else
|
|
-- prev must be a grammar token; consecutive TK_SPACE
|
|
-- tokens is impossible when optimizing whitespace
|
|
local ntok = stoks[i + 1]
|
|
if is_faketoken[ntok] then
|
|
-- handle special case where a '-' cannot abut with
|
|
-- either a short comment or a long comment
|
|
if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
|
|
ptok == "TK_OP" and sinfos[prev] == "-" then
|
|
-- keep token
|
|
else
|
|
settoken() -- remove entirely
|
|
end
|
|
else--is_realtoken
|
|
-- check a pair of grammar tokens, if can abut, then
|
|
-- delete space token entirely, otherwise keep one space
|
|
local s = checkpair(prev, i + 1)
|
|
if s == "" then
|
|
settoken() -- remove entirely
|
|
else
|
|
settoken("TK_SPACE", " ")
|
|
end
|
|
end
|
|
end
|
|
------------------------------------------------------------
|
|
end
|
|
end
|
|
----------------------------------------------------------------
|
|
else
|
|
error("unidentified token encountered")
|
|
end
|
|
----------------------------------------------------------------
|
|
i = i + 1
|
|
end--while
|
|
repack_tokens()
|
|
--------------------------------------------------------------------
|
|
-- processing loop (PASS 2)
|
|
--------------------------------------------------------------------
|
|
if opt_eols then
|
|
i = 1
|
|
-- aggressive EOL removal only works with most non-grammar tokens
|
|
-- optimized away because it is a rather simple scheme -- basically
|
|
-- it just checks 'real' token pairs around EOLs
|
|
if stoks[1] == "TK_COMMENT" then
|
|
-- first comment still existing must be shbang, skip whole line
|
|
i = 3
|
|
end
|
|
while true do
|
|
tok, info = stoks[i], sinfos[i]
|
|
--------------------------------------------------------------
|
|
if tok == "TK_EOS" then -- end of stream/pass
|
|
break
|
|
--------------------------------------------------------------
|
|
elseif tok == "TK_EOL" then -- consider each TK_EOL
|
|
local t1, t2 = stoks[i - 1], stoks[i + 1]
|
|
if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
|
|
local s = checkpair(i - 1, i + 1)
|
|
if s == "" then
|
|
settoken() -- remove entirely
|
|
end
|
|
end
|
|
end--if tok
|
|
--------------------------------------------------------------
|
|
i = i + 1
|
|
end--while
|
|
repack_tokens()
|
|
end
|
|
--------------------------------------------------------------------
|
|
if opt_details and opt_details > 0 then print() end -- spacing
|
|
return stoks, sinfos, stoklns
|
|
end
|
|
end)
|
|
package.preload['optparser'] = (function (...)
|
|
--[[--------------------------------------------------------------------
|
|
|
|
optparser.lua: does parser-based optimizations
|
|
This file is part of LuaSrcDiet.
|
|
|
|
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
|
|
The COPYRIGHT file describes the conditions
|
|
under which this software may be distributed.
|
|
|
|
See the ChangeLog for more information.
|
|
|
|
----------------------------------------------------------------------]]
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- NOTES:
|
|
-- * For more parser-based optimization ideas, see the TODO items or
|
|
-- look at technotes.txt.
|
|
-- * The processing load is quite significant, but since this is an
|
|
-- off-line text processor, I believe we can wait a few seconds.
|
|
-- * TODO: might process "local a,a,a" wrongly... need tests!
|
|
-- * TODO: remove position handling if overlapped locals (rem < 0)
|
|
-- needs more study, to check behaviour
|
|
-- * TODO: there are probably better ways to do allocation, e.g. by
|
|
-- choosing better methods to sort and pick locals...
|
|
-- * TODO: we don't need 53*63 two-letter identifiers; we can make
|
|
-- do with significantly less depending on how many that are really
|
|
-- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead
|
|
----------------------------------------------------------------------]]
|
|
|
|
local base = _G
|
|
local string = string
|
|
local table = table
|
|
module "optparser"
|
|
|
|
----------------------------------------------------------------------
|
|
-- Letter frequencies for reducing symbol entropy (fixed version)
|
|
-- * Might help a wee bit when the output file is compressed
|
|
-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
|
|
-- * We use letter frequencies according to a Linotype keyboard, plus
|
|
-- the underscore, and both lower case and upper case letters.
|
|
-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
|
|
-- * This is certainly not optimal, but is quick-and-dirty and the
|
|
-- process has no significant overhead
|
|
----------------------------------------------------------------------
|
|
|
|
local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
|
|
local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
|
|
|
|
-- names or identifiers that must be skipped
|
|
-- * the first two lines are for keywords
|
|
local SKIP_NAME = {}
|
|
for v in string.gmatch([[
|
|
and break do else elseif end false for function if in
|
|
local nil not or repeat return then true until while
|
|
self]], "%S+") do
|
|
SKIP_NAME[v] = true
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- variables and data structures
|
|
------------------------------------------------------------------------
|
|
|
|
local toklist, seminfolist, -- token lists
|
|
globalinfo, localinfo, -- variable information tables
|
|
globaluniq, localuniq, -- unique name tables
|
|
var_new, -- index of new variable names
|
|
varlist -- list of output variables
|
|
|
|
----------------------------------------------------------------------
|
|
-- preprocess information table to get lists of unique names
|
|
----------------------------------------------------------------------
|
|
|
|
local function preprocess(infotable)
|
|
local uniqtable = {}
|
|
for i = 1, #infotable do -- enumerate info table
|
|
local obj = infotable[i]
|
|
local name = obj.name
|
|
--------------------------------------------------------------------
|
|
if not uniqtable[name] then -- not found, start an entry
|
|
uniqtable[name] = {
|
|
decl = 0, token = 0, size = 0,
|
|
}
|
|
end
|
|
--------------------------------------------------------------------
|
|
local uniq = uniqtable[name] -- count declarations, tokens, size
|
|
uniq.decl = uniq.decl + 1
|
|
local xref = obj.xref
|
|
local xcount = #xref
|
|
uniq.token = uniq.token + xcount
|
|
uniq.size = uniq.size + xcount * #name
|
|
--------------------------------------------------------------------
|
|
if obj.decl then -- if local table, create first,last pairs
|
|
obj.id = i
|
|
obj.xcount = xcount
|
|
if xcount > 1 then -- if ==1, means local never accessed
|
|
obj.first = xref[2]
|
|
obj.last = xref[xcount]
|
|
end
|
|
--------------------------------------------------------------------
|
|
else -- if global table, add a back ref
|
|
uniq.id = i
|
|
end
|
|
--------------------------------------------------------------------
|
|
end--for
|
|
return uniqtable
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- calculate actual symbol frequencies, in order to reduce entropy
|
|
-- * this may help further reduce the size of compressed sources
|
|
-- * note that since parsing optimizations is put before lexing
|
|
-- optimizations, the frequency table is not exact!
|
|
-- * yes, this will miss --keep block comments too...
|
|
----------------------------------------------------------------------
|
|
|
|
local function recalc_for_entropy(option)
|
|
local byte = string.byte
|
|
local char = string.char
|
|
-- table of token classes to accept in calculating symbol frequency
|
|
local ACCEPT = {
|
|
TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
|
|
TK_STRING = true, TK_LSTRING = true,
|
|
}
|
|
if not option["opt-comments"] then
|
|
ACCEPT.TK_COMMENT = true
|
|
ACCEPT.TK_LCOMMENT = true
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- create a new table and remove any original locals by filtering
|
|
--------------------------------------------------------------------
|
|
local filtered = {}
|
|
for i = 1, #toklist do
|
|
filtered[i] = seminfolist[i]
|
|
end
|
|
for i = 1, #localinfo do -- enumerate local info table
|
|
local obj = localinfo[i]
|
|
local xref = obj.xref
|
|
for j = 1, obj.xcount do
|
|
local p = xref[j]
|
|
filtered[p] = "" -- remove locals
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
local freq = {} -- reset symbol frequency table
|
|
for i = 0, 255 do freq[i] = 0 end
|
|
for i = 1, #toklist do -- gather symbol frequency
|
|
local tok, info = toklist[i], filtered[i]
|
|
if ACCEPT[tok] then
|
|
for j = 1, #info do
|
|
local c = byte(info, j)
|
|
freq[c] = freq[c] + 1
|
|
end
|
|
end--if
|
|
end--for
|
|
--------------------------------------------------------------------
|
|
-- function to re-sort symbols according to actual frequencies
|
|
--------------------------------------------------------------------
|
|
local function resort(symbols)
|
|
local symlist = {}
|
|
for i = 1, #symbols do -- prepare table to sort
|
|
local c = byte(symbols, i)
|
|
symlist[i] = { c = c, freq = freq[c], }
|
|
end
|
|
table.sort(symlist, -- sort selected symbols
|
|
function(v1, v2)
|
|
return v1.freq > v2.freq
|
|
end
|
|
)
|
|
local charlist = {} -- reconstitute the string
|
|
for i = 1, #symlist do
|
|
charlist[i] = char(symlist[i].c)
|
|
end
|
|
return table.concat(charlist)
|
|
end
|
|
--------------------------------------------------------------------
|
|
LETTERS = resort(LETTERS) -- change letter arrangement
|
|
ALPHANUM = resort(ALPHANUM)
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- returns a string containing a new local variable name to use, and
|
|
-- a flag indicating whether it collides with a global variable
|
|
-- * trapping keywords and other names like 'self' is done elsewhere
|
|
----------------------------------------------------------------------
|
|
|
|
local function new_var_name()
|
|
local var
|
|
local cletters, calphanum = #LETTERS, #ALPHANUM
|
|
local v = var_new
|
|
if v < cletters then -- single char
|
|
v = v + 1
|
|
var = string.sub(LETTERS, v, v)
|
|
else -- longer names
|
|
local range, sz = cletters, 1 -- calculate # chars fit
|
|
repeat
|
|
v = v - range
|
|
range = range * calphanum
|
|
sz = sz + 1
|
|
until range > v
|
|
local n = v % cletters -- left side cycles faster
|
|
v = (v - n) / cletters -- do first char first
|
|
n = n + 1
|
|
var = string.sub(LETTERS, n, n)
|
|
while sz > 1 do
|
|
local m = v % calphanum
|
|
v = (v - m) / calphanum
|
|
m = m + 1
|
|
var = var..string.sub(ALPHANUM, m, m)
|
|
sz = sz - 1
|
|
end
|
|
end
|
|
var_new = var_new + 1
|
|
return var, globaluniq[var] ~= nil
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- calculate and print some statistics
|
|
-- * probably better in main source, put here for now
|
|
----------------------------------------------------------------------
|
|
|
|
local function stats_summary(globaluniq, localuniq, afteruniq, option)
|
|
local print = print or base.print
|
|
local fmt = string.format
|
|
local opt_details = option.DETAILS
|
|
local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed
|
|
decl_g, decl_li, decl_lo, decl_ti, decl_to,
|
|
token_g, token_li, token_lo, token_ti, token_to,
|
|
size_g, size_li, size_lo, size_ti, size_to
|
|
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
|
local function avg(c, l) -- safe average function
|
|
if c == 0 then return 0 end
|
|
return l / c
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- collect statistics (note: globals do not have declarations!)
|
|
--------------------------------------------------------------------
|
|
for name, uniq in base.pairs(globaluniq) do
|
|
uniq_g = uniq_g + 1
|
|
token_g = token_g + uniq.token
|
|
size_g = size_g + uniq.size
|
|
end
|
|
for name, uniq in base.pairs(localuniq) do
|
|
uniq_li = uniq_li + 1
|
|
decl_li = decl_li + uniq.decl
|
|
token_li = token_li + uniq.token
|
|
size_li = size_li + uniq.size
|
|
end
|
|
for name, uniq in base.pairs(afteruniq) do
|
|
uniq_lo = uniq_lo + 1
|
|
decl_lo = decl_lo + uniq.decl
|
|
token_lo = token_lo + uniq.token
|
|
size_lo = size_lo + uniq.size
|
|
end
|
|
uniq_ti = uniq_g + uniq_li
|
|
decl_ti = decl_g + decl_li
|
|
token_ti = token_g + token_li
|
|
size_ti = size_g + size_li
|
|
uniq_to = uniq_g + uniq_lo
|
|
decl_to = decl_g + decl_lo
|
|
token_to = token_g + token_lo
|
|
size_to = size_g + size_lo
|
|
--------------------------------------------------------------------
|
|
-- detailed stats: global list
|
|
--------------------------------------------------------------------
|
|
if opt_details then
|
|
local sorted = {} -- sort table of unique global names by size
|
|
for name, uniq in base.pairs(globaluniq) do
|
|
uniq.name = name
|
|
sorted[#sorted + 1] = uniq
|
|
end
|
|
table.sort(sorted,
|
|
function(v1, v2)
|
|
return v1.size > v2.size
|
|
end
|
|
)
|
|
local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s"
|
|
local hl = string.rep("-", 44)
|
|
print("*** global variable list (sorted by size) ***\n"..hl)
|
|
print(fmt(tabf1, "Token", "Input", "Input", "Global"))
|
|
print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
|
|
print(hl)
|
|
for i = 1, #sorted do
|
|
local uniq = sorted[i]
|
|
print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
|
|
end
|
|
print(hl)
|
|
print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
|
|
print(hl.."\n")
|
|
--------------------------------------------------------------------
|
|
-- detailed stats: local list
|
|
--------------------------------------------------------------------
|
|
local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s"
|
|
local hl = string.rep("-", 70)
|
|
print("*** local variable list (sorted by allocation order) ***\n"..hl)
|
|
print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global"))
|
|
print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
|
|
print(hl)
|
|
for i = 1, #varlist do -- iterate according to order assigned
|
|
local name = varlist[i]
|
|
local uniq = afteruniq[name]
|
|
local old_t, old_s = 0, 0
|
|
for j = 1, #localinfo do -- find corresponding old names and calculate
|
|
local obj = localinfo[j]
|
|
if obj.name == name then
|
|
old_t = old_t + obj.xcount
|
|
old_s = old_s + obj.xcount * #obj.oldname
|
|
end
|
|
end
|
|
print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
|
|
uniq.size, avg(uniq.token, uniq.size), name))
|
|
end
|
|
print(hl)
|
|
print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
|
|
size_lo, avg(token_lo, size_lo), "TOTAL"))
|
|
print(hl.."\n")
|
|
end--if opt_details
|
|
--------------------------------------------------------------------
|
|
-- display output
|
|
--------------------------------------------------------------------
|
|
local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
|
|
local hl = string.rep("-", 58)
|
|
print("*** local variable optimization summary ***\n"..hl)
|
|
print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average"))
|
|
print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
|
|
print(hl)
|
|
print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
|
|
print(hl)
|
|
print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
|
|
print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
|
|
print(hl)
|
|
print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
|
|
print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
|
|
print(hl.."\n")
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- main entry point
|
|
-- * does only local variable optimization for now
|
|
----------------------------------------------------------------------
|
|
|
|
function optimize(option, _toklist, _seminfolist, _globalinfo, _localinfo)
|
|
-- set tables
|
|
toklist, seminfolist, globalinfo, localinfo
|
|
= _toklist, _seminfolist, _globalinfo, _localinfo
|
|
var_new = 0 -- reset variable name allocator
|
|
varlist = {}
|
|
------------------------------------------------------------------
|
|
-- preprocess global/local tables, handle entropy reduction
|
|
------------------------------------------------------------------
|
|
globaluniq = preprocess(globalinfo)
|
|
localuniq = preprocess(localinfo)
|
|
if option["opt-entropy"] then -- for entropy improvement
|
|
recalc_for_entropy(option)
|
|
end
|
|
------------------------------------------------------------------
|
|
-- build initial declared object table, then sort according to
|
|
-- token count, this might help assign more tokens to more common
|
|
-- variable names such as 'e' thus possibly reducing entropy
|
|
-- * an object knows its localinfo index via its 'id' field
|
|
-- * special handling for "self" special local (parameter) here
|
|
------------------------------------------------------------------
|
|
local object = {}
|
|
for i = 1, #localinfo do
|
|
object[i] = localinfo[i]
|
|
end
|
|
table.sort(object, -- sort largest first
|
|
function(v1, v2)
|
|
return v1.xcount > v2.xcount
|
|
end
|
|
)
|
|
------------------------------------------------------------------
|
|
-- the special "self" function parameters must be preserved
|
|
-- * the allocator below will never use "self", so it is safe to
|
|
-- keep those implicit declarations as-is
|
|
------------------------------------------------------------------
|
|
local temp, j, gotself = {}, 1, false
|
|
for i = 1, #object do
|
|
local obj = object[i]
|
|
if not obj.isself then
|
|
temp[j] = obj
|
|
j = j + 1
|
|
else
|
|
gotself = true
|
|
end
|
|
end
|
|
object = temp
|
|
------------------------------------------------------------------
|
|
-- a simple first-come first-served heuristic name allocator,
|
|
-- note that this is in no way optimal...
|
|
-- * each object is a local variable declaration plus existence
|
|
-- * the aim is to assign short names to as many tokens as possible,
|
|
-- so the following tries to maximize name reuse
|
|
-- * note that we preserve sort order
|
|
------------------------------------------------------------------
|
|
local nobject = #object
|
|
while nobject > 0 do
|
|
local varname, gcollide
|
|
repeat
|
|
varname, gcollide = new_var_name() -- collect a variable name
|
|
until not SKIP_NAME[varname] -- skip all special names
|
|
varlist[#varlist + 1] = varname -- keep a list
|
|
local oleft = nobject
|
|
------------------------------------------------------------------
|
|
-- if variable name collides with an existing global, the name
|
|
-- cannot be used by a local when the name is accessed as a global
|
|
-- during which the local is alive (between 'act' to 'rem'), so
|
|
-- we drop objects that collides with the corresponding global
|
|
------------------------------------------------------------------
|
|
if gcollide then
|
|
-- find the xref table of the global
|
|
local gref = globalinfo[globaluniq[varname].id].xref
|
|
local ngref = #gref
|
|
-- enumerate for all current objects; all are valid at this point
|
|
for i = 1, nobject do
|
|
local obj = object[i]
|
|
local act, rem = obj.act, obj.rem -- 'live' range of local
|
|
-- if rem < 0, it is a -id to a local that had the same name
|
|
-- so follow rem to extend it; does this make sense?
|
|
while rem < 0 do
|
|
rem = localinfo[-rem].rem
|
|
end
|
|
local drop
|
|
for j = 1, ngref do
|
|
local p = gref[j]
|
|
if p >= act and p <= rem then drop = true end -- in range?
|
|
end
|
|
if drop then
|
|
obj.skip = true
|
|
oleft = oleft - 1
|
|
end
|
|
end--for
|
|
end--if gcollide
|
|
------------------------------------------------------------------
|
|
-- now the first unassigned local (since it's sorted) will be the
|
|
-- one with the most tokens to rename, so we set this one and then
|
|
-- eliminate all others that collides, then any locals that left
|
|
-- can then reuse the same variable name; this is repeated until
|
|
-- all local declaration that can use this name is assigned
|
|
-- * the criteria for local-local reuse/collision is:
|
|
-- A is the local with a name already assigned
|
|
-- B is the unassigned local under consideration
|
|
-- => anytime A is accessed, it cannot be when B is 'live'
|
|
-- => to speed up things, we have first/last accesses noted
|
|
------------------------------------------------------------------
|
|
while oleft > 0 do
|
|
local i = 1
|
|
while object[i].skip do -- scan for first object
|
|
i = i + 1
|
|
end
|
|
------------------------------------------------------------------
|
|
-- first object is free for assignment of the variable name
|
|
-- [first,last] gives the access range for collision checking
|
|
------------------------------------------------------------------
|
|
oleft = oleft - 1
|
|
local obja = object[i]
|
|
i = i + 1
|
|
obja.newname = varname
|
|
obja.skip = true
|
|
obja.done = true
|
|
local first, last = obja.first, obja.last
|
|
local xref = obja.xref
|
|
------------------------------------------------------------------
|
|
-- then, scan all the rest and drop those colliding
|
|
-- if A was never accessed then it'll never collide with anything
|
|
-- otherwise trivial skip if:
|
|
-- * B was activated after A's last access (last < act)
|
|
-- * B was removed before A's first access (first > rem)
|
|
-- if not, see detailed skip below...
|
|
------------------------------------------------------------------
|
|
if first and oleft > 0 then -- must have at least 1 access
|
|
local scanleft = oleft
|
|
while scanleft > 0 do
|
|
while object[i].skip do -- next valid object
|
|
i = i + 1
|
|
end
|
|
scanleft = scanleft - 1
|
|
local objb = object[i]
|
|
i = i + 1
|
|
local act, rem = objb.act, objb.rem -- live range of B
|
|
-- if rem < 0, extend range of rem thru' following local
|
|
while rem < 0 do
|
|
rem = localinfo[-rem].rem
|
|
end
|
|
--------------------------------------------------------
|
|
if not(last < act or first > rem) then -- possible collision
|
|
--------------------------------------------------------
|
|
-- B is activated later than A or at the same statement,
|
|
-- this means for no collision, A cannot be accessed when B
|
|
-- is alive, since B overrides A (or is a peer)
|
|
--------------------------------------------------------
|
|
if act >= obja.act then
|
|
for j = 1, obja.xcount do -- ... then check every access
|
|
local p = xref[j]
|
|
if p >= act and p <= rem then -- A accessed when B live!
|
|
oleft = oleft - 1
|
|
objb.skip = true
|
|
break
|
|
end
|
|
end--for
|
|
--------------------------------------------------------
|
|
-- A is activated later than B, this means for no collision,
|
|
-- A's access is okay since it overrides B, but B's last
|
|
-- access need to be earlier than A's activation time
|
|
--------------------------------------------------------
|
|
else
|
|
if objb.last and objb.last >= obja.act then
|
|
oleft = oleft - 1
|
|
objb.skip = true
|
|
end
|
|
end
|
|
end
|
|
--------------------------------------------------------
|
|
if oleft == 0 then break end
|
|
end
|
|
end--if first
|
|
------------------------------------------------------------------
|
|
end--while
|
|
------------------------------------------------------------------
|
|
-- after assigning all possible locals to one variable name, the
|
|
-- unassigned locals/objects have the skip field reset and the table
|
|
-- is compacted, to hopefully reduce iteration time
|
|
------------------------------------------------------------------
|
|
local temp, j = {}, 1
|
|
for i = 1, nobject do
|
|
local obj = object[i]
|
|
if not obj.done then
|
|
obj.skip = false
|
|
temp[j] = obj
|
|
j = j + 1
|
|
end
|
|
end
|
|
object = temp -- new compacted object table
|
|
nobject = #object -- objects left to process
|
|
------------------------------------------------------------------
|
|
end--while
|
|
------------------------------------------------------------------
|
|
-- after assigning all locals with new variable names, we can
|
|
-- patch in the new names, and reprocess to get 'after' stats
|
|
------------------------------------------------------------------
|
|
for i = 1, #localinfo do -- enumerate all locals
|
|
local obj = localinfo[i]
|
|
local xref = obj.xref
|
|
if obj.newname then -- if got new name, patch it in
|
|
for j = 1, obj.xcount do
|
|
local p = xref[j] -- xrefs indexes the token list
|
|
seminfolist[p] = obj.newname
|
|
end
|
|
obj.name, obj.oldname -- adjust names
|
|
= obj.newname, obj.name
|
|
else
|
|
obj.oldname = obj.name -- for cases like 'self'
|
|
end
|
|
end
|
|
------------------------------------------------------------------
|
|
-- deal with statistics output
|
|
------------------------------------------------------------------
|
|
if gotself then -- add 'self' to end of list
|
|
varlist[#varlist + 1] = "self"
|
|
end
|
|
local afteruniq = preprocess(localinfo)
|
|
stats_summary(globaluniq, localuniq, afteruniq, option)
|
|
------------------------------------------------------------------
|
|
end
|
|
end)
|
|
--[[--------------------------------------------------------------------
|
|
|
|
LuaSrcDiet
|
|
Compresses Lua source code by removing unnecessary characters.
|
|
For Lua 5.1.x source code.
|
|
|
|
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
|
|
The COPYRIGHT file describes the conditions
|
|
under which this software may be distributed.
|
|
|
|
See the ChangeLog for more information.
|
|
|
|
----------------------------------------------------------------------]]
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- NOTES:
|
|
-- * Remember to update version and date information below (MSG_TITLE)
|
|
-- * TODO: to implement pcall() to properly handle lexer etc. errors
|
|
-- * TODO: verify token stream or double-check binary chunk?
|
|
-- * TODO: need some automatic testing for a semblance of sanity
|
|
-- * TODO: the plugin module is highly experimental and unstable
|
|
----------------------------------------------------------------------]]
|
|
|
|
-- standard libraries, functions
|
|
local string = string
|
|
local math = math
|
|
local table = table
|
|
local require = require
|
|
local print = print
|
|
local sub = string.sub
|
|
local gmatch = string.gmatch
|
|
|
|
-- support modules
|
|
local llex = require "llex"
|
|
local lparser = require "lparser"
|
|
local optlex = require "optlex"
|
|
local optparser = require "optparser"
|
|
local plugin
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- messages and textual data
|
|
----------------------------------------------------------------------]]
|
|
|
|
local MSG_TITLE = [[
|
|
LuaSrcDiet: Puts your Lua 5.1 source code on a diet
|
|
Version 0.11.2 (20080608) Copyright (c) 2005-2008 Kein-Hong Man
|
|
The COPYRIGHT file describes the conditions under which this
|
|
software may be distributed.
|
|
]]
|
|
|
|
local MSG_USAGE = [[
|
|
usage: LuaSrcDiet [options] [filenames]
|
|
|
|
example:
|
|
>LuaSrcDiet myscript.lua -o myscript_.lua
|
|
|
|
options:
|
|
-v, --version prints version information
|
|
-h, --help prints usage information
|
|
-o <file> specify file name to write output
|
|
-s <suffix> suffix for output files (default '_')
|
|
--keep <msg> keep block comment with <msg> inside
|
|
--plugin <module> run <module> in plugin/ directory
|
|
- stop handling arguments
|
|
|
|
(optimization levels)
|
|
--none all optimizations off (normalizes EOLs only)
|
|
--basic lexer-based optimizations only
|
|
--maximum maximize reduction of source
|
|
|
|
(informational)
|
|
--quiet process files quietly
|
|
--read-only read file and print token stats only
|
|
--dump-lexer dump raw tokens from lexer to stdout
|
|
--dump-parser dump variable tracking tables from parser
|
|
--details extra info (strings, numbers, locals)
|
|
|
|
features (to disable, insert 'no' prefix like --noopt-comments):
|
|
%s
|
|
default settings:
|
|
%s]]
|
|
|
|
------------------------------------------------------------------------
|
|
-- optimization options, for ease of switching on and off
|
|
-- * positive to enable optimization, negative (no) to disable
|
|
-- * these options should follow --opt-* and --noopt-* style for now
|
|
------------------------------------------------------------------------
|
|
|
|
local OPTION = [[
|
|
--opt-comments,'remove comments and block comments'
|
|
--opt-whitespace,'remove whitespace excluding EOLs'
|
|
--opt-emptylines,'remove empty lines'
|
|
--opt-eols,'all above, plus remove unnecessary EOLs'
|
|
--opt-strings,'optimize strings and long strings'
|
|
--opt-numbers,'optimize numbers'
|
|
--opt-locals,'optimize local variable names'
|
|
--opt-entropy,'tries to reduce symbol entropy of locals'
|
|
]]
|
|
|
|
-- preset configuration
|
|
local DEFAULT_CONFIG = [[
|
|
--opt-comments --opt-whitespace --opt-emptylines
|
|
--opt-numbers --opt-locals
|
|
]]
|
|
-- override configurations: MUST explicitly enable/disable everything
|
|
local BASIC_CONFIG = [[
|
|
--opt-comments --opt-whitespace --opt-emptylines
|
|
--noopt-eols --noopt-strings --noopt-numbers
|
|
--noopt-locals
|
|
]]
|
|
local MAXIMUM_CONFIG = [[
|
|
--opt-comments --opt-whitespace --opt-emptylines
|
|
--opt-eols --opt-strings --opt-numbers
|
|
--opt-locals --opt-entropy
|
|
]]
|
|
local NONE_CONFIG = [[
|
|
--noopt-comments --noopt-whitespace --noopt-emptylines
|
|
--noopt-eols --noopt-strings --noopt-numbers
|
|
--noopt-locals
|
|
]]
|
|
|
|
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
|
|
local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- startup and initialize option list handling
|
|
----------------------------------------------------------------------]]
|
|
|
|
-- simple error message handler; change to error if traceback wanted
|
|
local function die(msg)
|
|
print("LuaSrcDiet: "..msg); os.exit()
|
|
end
|
|
--die = error--DEBUG
|
|
|
|
if not string.match(_VERSION, "5.1", 1, 1) then -- sanity check
|
|
die("requires Lua 5.1 to run")
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- prepares text for list of optimizations, prepare lookup table
|
|
------------------------------------------------------------------------
|
|
|
|
local MSG_OPTIONS = ""
|
|
do
|
|
local WIDTH = 24
|
|
local o = {}
|
|
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
|
|
local msg = " "..op
|
|
msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n"
|
|
MSG_OPTIONS = MSG_OPTIONS..msg
|
|
o[op] = true
|
|
o["--no"..sub(op, 3)] = true
|
|
end
|
|
OPTION = o -- replace OPTION with lookup table
|
|
end
|
|
|
|
MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
|
|
|
|
------------------------------------------------------------------------
|
|
-- global variable initialization, option set handling
|
|
------------------------------------------------------------------------
|
|
|
|
local suffix = DEFAULT_SUFFIX -- file suffix
|
|
local option = {} -- program options
|
|
local stat_c, stat_l -- statistics tables
|
|
|
|
-- function to set option lookup table based on a text list of options
|
|
-- note: additional forced settings for --opt-eols is done in optlex.lua
|
|
local function set_options(CONFIG)
|
|
for op in gmatch(CONFIG, "(%-%-%S+)") do
|
|
if sub(op, 3, 4) == "no" and -- handle negative options
|
|
OPTION["--"..sub(op, 5)] then
|
|
option[sub(op, 5)] = false
|
|
else
|
|
option[sub(op, 3)] = true
|
|
end
|
|
end
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- support functions
|
|
----------------------------------------------------------------------]]
|
|
|
|
-- list of token types, parser-significant types are up to TTYPE_GRAMMAR
|
|
-- while the rest are not used by parsers; arranged for stats display
|
|
local TTYPES = {
|
|
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
|
|
"TK_STRING", "TK_LSTRING", "TK_OP",
|
|
"TK_EOS",
|
|
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
|
|
"TK_EOL", "TK_SPACE",
|
|
}
|
|
local TTYPE_GRAMMAR = 7
|
|
|
|
local EOLTYPES = { -- EOL names for token dump
|
|
["\n"] = "LF", ["\r"] = "CR",
|
|
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
|
|
}
|
|
|
|
------------------------------------------------------------------------
|
|
-- read source code from file
|
|
------------------------------------------------------------------------
|
|
|
|
local function load_file(fname)
|
|
local INF = io.open(fname, "rb")
|
|
if not INF then die("cannot open \""..fname.."\" for reading") end
|
|
local dat = INF:read("*a")
|
|
if not dat then die("cannot read from \""..fname.."\"") end
|
|
INF:close()
|
|
return dat
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- save source code to file
|
|
------------------------------------------------------------------------
|
|
|
|
local function save_file(fname, dat)
|
|
local OUTF = io.open(fname, "wb")
|
|
if not OUTF then die("cannot open \""..fname.."\" for writing") end
|
|
local status = OUTF:write(dat)
|
|
if not status then die("cannot write to \""..fname.."\"") end
|
|
OUTF:close()
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- functions to deal with statistics
|
|
------------------------------------------------------------------------
|
|
|
|
-- initialize statistics table
|
|
local function stat_init()
|
|
stat_c, stat_l = {}, {}
|
|
for i = 1, #TTYPES do
|
|
local ttype = TTYPES[i]
|
|
stat_c[ttype], stat_l[ttype] = 0, 0
|
|
end
|
|
end
|
|
|
|
-- add a token to statistics table
|
|
local function stat_add(tok, seminfo)
|
|
stat_c[tok] = stat_c[tok] + 1
|
|
stat_l[tok] = stat_l[tok] + #seminfo
|
|
end
|
|
|
|
-- do totals for statistics table, return average table
|
|
local function stat_calc()
|
|
local function avg(c, l) -- safe average function
|
|
if c == 0 then return 0 end
|
|
return l / c
|
|
end
|
|
local stat_a = {}
|
|
local c, l = 0, 0
|
|
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
|
|
local ttype = TTYPES[i]
|
|
c = c + stat_c[ttype]; l = l + stat_l[ttype]
|
|
end
|
|
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
|
|
stat_a.TOTAL_TOK = avg(c, l)
|
|
c, l = 0, 0
|
|
for i = 1, #TTYPES do -- total all tokens
|
|
local ttype = TTYPES[i]
|
|
c = c + stat_c[ttype]; l = l + stat_l[ttype]
|
|
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
|
|
end
|
|
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
|
|
stat_a.TOTAL_ALL = avg(c, l)
|
|
return stat_a
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- main tasks
|
|
----------------------------------------------------------------------]]
|
|
|
|
------------------------------------------------------------------------
|
|
-- a simple token dumper, minimal translation of seminfo data
|
|
------------------------------------------------------------------------
|
|
|
|
local function dump_tokens(srcfl)
|
|
--------------------------------------------------------------------
|
|
-- load file and process source input into tokens
|
|
--------------------------------------------------------------------
|
|
local z = load_file(srcfl)
|
|
llex.init(z)
|
|
llex.llex()
|
|
local toklist, seminfolist = llex.tok, llex.seminfo
|
|
--------------------------------------------------------------------
|
|
-- display output
|
|
--------------------------------------------------------------------
|
|
for i = 1, #toklist do
|
|
local tok, seminfo = toklist[i], seminfolist[i]
|
|
if tok == "TK_OP" and string.byte(seminfo) < 32 then
|
|
seminfo = "(".. string.byte(seminfo)..")"
|
|
elseif tok == "TK_EOL" then
|
|
seminfo = EOLTYPES[seminfo]
|
|
else
|
|
seminfo = "'"..seminfo.."'"
|
|
end
|
|
print(tok.." "..seminfo)
|
|
end--for
|
|
end
|
|
|
|
----------------------------------------------------------------------
|
|
-- parser dump; dump globalinfo and localinfo tables
|
|
----------------------------------------------------------------------
|
|
|
|
local function dump_parser(srcfl)
|
|
local print = print
|
|
--------------------------------------------------------------------
|
|
-- load file and process source input into tokens
|
|
--------------------------------------------------------------------
|
|
local z = load_file(srcfl)
|
|
llex.init(z)
|
|
llex.llex()
|
|
local toklist, seminfolist, toklnlist
|
|
= llex.tok, llex.seminfo, llex.tokln
|
|
--------------------------------------------------------------------
|
|
-- do parser optimization here
|
|
--------------------------------------------------------------------
|
|
lparser.init(toklist, seminfolist, toklnlist)
|
|
local globalinfo, localinfo = lparser.parser()
|
|
--------------------------------------------------------------------
|
|
-- display output
|
|
--------------------------------------------------------------------
|
|
local hl = string.rep("-", 72)
|
|
print("*** Local/Global Variable Tracker Tables ***")
|
|
print(hl.."\n GLOBALS\n"..hl)
|
|
-- global tables have a list of xref numbers only
|
|
for i = 1, #globalinfo do
|
|
local obj = globalinfo[i]
|
|
local msg = "("..i..") '"..obj.name.."' -> "
|
|
local xref = obj.xref
|
|
for j = 1, #xref do msg = msg..xref[j].." " end
|
|
print(msg)
|
|
end
|
|
-- local tables have xref numbers and a few other special
|
|
-- numbers that are specially named: decl (declaration xref),
|
|
-- act (activation xref), rem (removal xref)
|
|
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
|
|
for i = 1, #localinfo do
|
|
local obj = localinfo[i]
|
|
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
|
|
" act:"..obj.act.." rem:"..obj.rem
|
|
if obj.isself then
|
|
msg = msg.." isself"
|
|
end
|
|
msg = msg.." -> "
|
|
local xref = obj.xref
|
|
for j = 1, #xref do msg = msg..xref[j].." " end
|
|
print(msg)
|
|
end
|
|
print(hl.."\n")
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- reads source file(s) and reports some statistics
|
|
------------------------------------------------------------------------
|
|
|
|
local function read_only(srcfl)
|
|
local print = print
|
|
--------------------------------------------------------------------
|
|
-- load file and process source input into tokens
|
|
--------------------------------------------------------------------
|
|
local z = load_file(srcfl)
|
|
llex.init(z)
|
|
llex.llex()
|
|
local toklist, seminfolist = llex.tok, llex.seminfo
|
|
print(MSG_TITLE)
|
|
print("Statistics for: "..srcfl.."\n")
|
|
--------------------------------------------------------------------
|
|
-- collect statistics
|
|
--------------------------------------------------------------------
|
|
stat_init()
|
|
for i = 1, #toklist do
|
|
local tok, seminfo = toklist[i], seminfolist[i]
|
|
stat_add(tok, seminfo)
|
|
end--for
|
|
local stat_a = stat_calc()
|
|
--------------------------------------------------------------------
|
|
-- display output
|
|
--------------------------------------------------------------------
|
|
local fmt = string.format
|
|
local function figures(tt)
|
|
return stat_c[tt], stat_l[tt], stat_a[tt]
|
|
end
|
|
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
|
|
local hl = string.rep("-", 42)
|
|
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
|
|
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
|
|
print(hl)
|
|
for i = 1, #TTYPES do
|
|
local ttype = TTYPES[i]
|
|
print(fmt(tabf2, ttype, figures(ttype)))
|
|
if ttype == "TK_EOS" then print(hl) end
|
|
end
|
|
print(hl)
|
|
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
|
|
print(hl)
|
|
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
|
|
print(hl.."\n")
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- process source file(s), write output and reports some statistics
|
|
------------------------------------------------------------------------
|
|
|
|
local function process_file(srcfl, destfl)
|
|
local function print(...) -- handle quiet option
|
|
if option.QUIET then return end
|
|
_G.print(...)
|
|
end
|
|
if plugin and plugin.init then -- plugin init
|
|
option.EXIT = false
|
|
plugin.init(option, srcfl, destfl)
|
|
if option.EXIT then return end
|
|
end
|
|
print(MSG_TITLE) -- title message
|
|
--------------------------------------------------------------------
|
|
-- load file and process source input into tokens
|
|
--------------------------------------------------------------------
|
|
local z = load_file(srcfl)
|
|
if plugin and plugin.post_load then -- plugin post-load
|
|
z = plugin.post_load(z) or z
|
|
if option.EXIT then return end
|
|
end
|
|
llex.init(z)
|
|
llex.llex()
|
|
local toklist, seminfolist, toklnlist
|
|
= llex.tok, llex.seminfo, llex.tokln
|
|
if plugin and plugin.post_lex then -- plugin post-lex
|
|
plugin.post_lex(toklist, seminfolist, toklnlist)
|
|
if option.EXIT then return end
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- collect 'before' statistics
|
|
--------------------------------------------------------------------
|
|
stat_init()
|
|
for i = 1, #toklist do
|
|
local tok, seminfo = toklist[i], seminfolist[i]
|
|
stat_add(tok, seminfo)
|
|
end--for
|
|
local stat1_a = stat_calc()
|
|
local stat1_c, stat1_l = stat_c, stat_l
|
|
--------------------------------------------------------------------
|
|
-- do parser optimization here
|
|
--------------------------------------------------------------------
|
|
if option["opt-locals"] then
|
|
optparser.print = print -- hack
|
|
lparser.init(toklist, seminfolist, toklnlist)
|
|
local globalinfo, localinfo = lparser.parser()
|
|
if plugin and plugin.post_parse then -- plugin post-parse
|
|
plugin.post_parse(globalinfo, localinfo)
|
|
if option.EXIT then return end
|
|
end
|
|
optparser.optimize(option, toklist, seminfolist, globalinfo, localinfo)
|
|
if plugin and plugin.post_optparse then -- plugin post-optparse
|
|
plugin.post_optparse()
|
|
if option.EXIT then return end
|
|
end
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- do lexer optimization here, save output file
|
|
--------------------------------------------------------------------
|
|
optlex.print = print -- hack
|
|
toklist, seminfolist, toklnlist
|
|
= optlex.optimize(option, toklist, seminfolist, toklnlist)
|
|
if plugin and plugin.post_optlex then -- plugin post-optlex
|
|
plugin.post_optlex(toklist, seminfolist, toklnlist)
|
|
if option.EXIT then return end
|
|
end
|
|
local dat = table.concat(seminfolist)
|
|
-- depending on options selected, embedded EOLs in long strings and
|
|
-- long comments may not have been translated to \n, tack a warning
|
|
if string.find(dat, "\r\n", 1, 1) or
|
|
string.find(dat, "\n\r", 1, 1) then
|
|
optlex.warn.mixedeol = true
|
|
end
|
|
-- save optimized source stream to output file
|
|
save_file(destfl, dat)
|
|
--------------------------------------------------------------------
|
|
-- collect 'after' statistics
|
|
--------------------------------------------------------------------
|
|
stat_init()
|
|
for i = 1, #toklist do
|
|
local tok, seminfo = toklist[i], seminfolist[i]
|
|
stat_add(tok, seminfo)
|
|
end--for
|
|
local stat_a = stat_calc()
|
|
--------------------------------------------------------------------
|
|
-- display output
|
|
--------------------------------------------------------------------
|
|
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
|
|
local fmt = string.format
|
|
local function figures(tt)
|
|
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
|
|
stat_c[tt], stat_l[tt], stat_a[tt]
|
|
end
|
|
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
|
|
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
|
|
local hl = string.rep("-", 68)
|
|
print("*** lexer-based optimizations summary ***\n"..hl)
|
|
print(fmt(tabf1, "Lexical",
|
|
"Input", "Input", "Input",
|
|
"Output", "Output", "Output"))
|
|
print(fmt(tabf1, "Elements",
|
|
"Count", "Bytes", "Average",
|
|
"Count", "Bytes", "Average"))
|
|
print(hl)
|
|
for i = 1, #TTYPES do
|
|
local ttype = TTYPES[i]
|
|
print(fmt(tabf2, ttype, figures(ttype)))
|
|
if ttype == "TK_EOS" then print(hl) end
|
|
end
|
|
print(hl)
|
|
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
|
|
print(hl)
|
|
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
|
|
print(hl)
|
|
--------------------------------------------------------------------
|
|
-- report warning flags from optimizing process
|
|
--------------------------------------------------------------------
|
|
if optlex.warn.lstring then
|
|
print("* WARNING: "..optlex.warn.lstring)
|
|
elseif optlex.warn.mixedeol then
|
|
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
|
|
end
|
|
print()
|
|
end
|
|
|
|
--[[--------------------------------------------------------------------
|
|
-- main functions
|
|
----------------------------------------------------------------------]]
|
|
|
|
local arg = {...} -- program arguments
|
|
local fspec = {}
|
|
set_options(DEFAULT_CONFIG) -- set to default options at beginning
|
|
|
|
------------------------------------------------------------------------
|
|
-- per-file handling, ship off to tasks
|
|
------------------------------------------------------------------------
|
|
|
|
local function do_files(fspec)
|
|
for _, srcfl in ipairs(fspec) do
|
|
local destfl
|
|
------------------------------------------------------------------
|
|
-- find and replace extension for filenames
|
|
------------------------------------------------------------------
|
|
local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
|
|
local basename, extension = srcfl, ""
|
|
if extb and extb > 1 then
|
|
basename = sub(srcfl, 1, extb - 1)
|
|
extension = sub(srcfl, extb, exte)
|
|
end
|
|
destfl = basename..suffix..extension
|
|
if #fspec == 1 and option.OUTPUT_FILE then
|
|
destfl = option.OUTPUT_FILE
|
|
end
|
|
if srcfl == destfl then
|
|
die("output filename identical to input filename")
|
|
end
|
|
------------------------------------------------------------------
|
|
-- perform requested operations
|
|
------------------------------------------------------------------
|
|
if option.DUMP_LEXER then
|
|
dump_tokens(srcfl)
|
|
elseif option.DUMP_PARSER then
|
|
dump_parser(srcfl)
|
|
elseif option.READ_ONLY then
|
|
read_only(srcfl)
|
|
else
|
|
process_file(srcfl, destfl)
|
|
end
|
|
end--for
|
|
end
|
|
|
|
------------------------------------------------------------------------
|
|
-- main function (entry point is after this definition)
|
|
------------------------------------------------------------------------
|
|
|
|
local function main()
|
|
local argn, i = #arg, 1
|
|
if argn == 0 then
|
|
option.HELP = true
|
|
end
|
|
--------------------------------------------------------------------
|
|
-- handle arguments
|
|
--------------------------------------------------------------------
|
|
while i <= argn do
|
|
local o, p = arg[i], arg[i + 1]
|
|
local dash = string.match(o, "^%-%-?")
|
|
if dash == "-" then -- single-dash options
|
|
if o == "-h" then
|
|
option.HELP = true; break
|
|
elseif o == "-v" then
|
|
option.VERSION = true; break
|
|
elseif o == "-s" then
|
|
if not p then die("-s option needs suffix specification") end
|
|
suffix = p
|
|
i = i + 1
|
|
elseif o == "-o" then
|
|
if not p then die("-o option needs a file name") end
|
|
option.OUTPUT_FILE = p
|
|
i = i + 1
|
|
elseif o == "-" then
|
|
break -- ignore rest of args
|
|
else
|
|
die("unrecognized option "..o)
|
|
end
|
|
elseif dash == "--" then -- double-dash options
|
|
if o == "--help" then
|
|
option.HELP = true; break
|
|
elseif o == "--version" then
|
|
option.VERSION = true; break
|
|
elseif o == "--keep" then
|
|
if not p then die("--keep option needs a string to match for") end
|
|
option.KEEP = p
|
|
i = i + 1
|
|
elseif o == "--plugin" then
|
|
if not p then die("--plugin option needs a module name") end
|
|
if option.PLUGIN then die("only one plugin can be specified") end
|
|
option.PLUGIN = p
|
|
plugin = require(PLUGIN_SUFFIX..p)
|
|
i = i + 1
|
|
elseif o == "--quiet" then
|
|
option.QUIET = true
|
|
elseif o == "--read-only" then
|
|
option.READ_ONLY = true
|
|
elseif o == "--basic" then
|
|
set_options(BASIC_CONFIG)
|
|
elseif o == "--maximum" then
|
|
set_options(MAXIMUM_CONFIG)
|
|
elseif o == "--none" then
|
|
set_options(NONE_CONFIG)
|
|
elseif o == "--dump-lexer" then
|
|
option.DUMP_LEXER = true
|
|
elseif o == "--dump-parser" then
|
|
option.DUMP_PARSER = true
|
|
elseif o == "--details" then
|
|
option.DETAILS = true
|
|
elseif OPTION[o] then -- lookup optimization options
|
|
set_options(o)
|
|
else
|
|
die("unrecognized option "..o)
|
|
end
|
|
else
|
|
fspec[#fspec + 1] = o -- potential filename
|
|
end
|
|
i = i + 1
|
|
end--while
|
|
if option.HELP then
|
|
print(MSG_TITLE..MSG_USAGE); return true
|
|
elseif option.VERSION then
|
|
print(MSG_TITLE); return true
|
|
end
|
|
if #fspec > 0 then
|
|
if #fspec > 1 and option.OUTPUT_FILE then
|
|
die("with -o, only one source file can be specified")
|
|
end
|
|
do_files(fspec)
|
|
return true
|
|
else
|
|
die("nothing to do!")
|
|
end
|
|
end
|
|
|
|
-- entry point -> main() -> do_files()
|
|
if not main() then
|
|
die("Please run with option -h or --help for usage information")
|
|
end
|
|
|
|
-- end of script
|