1
0
mirror of https://github.com/elua/elua.git synced 2025-01-08 20:56:17 +08:00
elua/luasrcdiet.lua
Bogdan Marinescu 26361ea4cd EXPERIMENTAL FEATURE: romfs compression/precompilation.
Added another "romfs" parameter to SConstruct. It can take one of 3 values:

- "verbatim" (default): copy all the files from the eLua romfs/ directory to the binary image (exactly what happened until now)
- "compress": use LuaSrcDiet (http://luaforge.net/projects/luasrcdiet, now included in the eLua distribution) to "compress" the source code by using different tricks (shorter identifiers, removing comments and EOLS, and so on). The output is still a compilable Lua file (although in most case it looks completely different from the original) which is copied in the eLua binary image instead of the original Lua file. The compression obtained by using this method seems to be very good in practice, thus it will save flash when needed.
- "compile": use the eLua cross compiler (that must be built first by running 'scons -f cross-lua.py') to precompile the Lua files to bytecode, and write the bytecode to the image instead of the source file. This way, one can save both time (the scripts don't need to be compiled anymore) and RAM (for the same reason, especially since the Lua parser uses the stack a lot, which can lead to very nasty and hard to diagnose stack overflow bugs). It will be even more useful in the future, when (hopefully) more and more Lua bytecode data structures will be available directly from Flash (without having to copy them RAM as it happens now). It might also decrease the romfs memory footprint, but then again, it might not; it pretty much depends on the Lua programs included in the romfs.
2009-12-02 19:33:03 +00:00

3726 lines
130 KiB
Lua
Executable File

#!/usr/bin/env lua
package.preload['llex'] = (function (...)
--[[--------------------------------------------------------------------
llex.lua: Lua 5.1 lexical analyzer in Lua
This file is part of LuaSrcDiet, based on Yueliang material.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) llex.error is an optional error function handler
-- (2) seminfo for strings include their delimiters and no
-- translation operations are performed on them
-- * ADDED shbang handling has been added to support executable scripts
-- * NO localized decimal point replacement magic
-- * NO limit to number of lines
-- * NO support for compatible long strings (LUA_COMPAT_LSTR)
-- * Please read technotes.txt for more technical details.
----------------------------------------------------------------------]]
local base = _G
local string = string
module "llex"
local find = string.find
local match = string.match
local sub = string.sub
----------------------------------------------------------------------
-- initialize keyword list, variables
----------------------------------------------------------------------
local kw = {}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while]], "%S+") do
kw[v] = true
end
-- NOTE: see init() for module variables (externally visible):
-- tok, seminfo, tokln
local z, -- source stream
sourceid, -- name of source
I, -- position of lexer
buff, -- buffer for strings
ln -- line number
----------------------------------------------------------------------
-- add information to token listing
----------------------------------------------------------------------
local function addtoken(token, info)
local i = #tok + 1
tok[i] = token
seminfo[i] = info
tokln[i] = ln
end
----------------------------------------------------------------------
-- handles line number incrementation and end-of-line characters
----------------------------------------------------------------------
local function inclinenumber(i, is_tok)
local sub = sub
local old = sub(z, i, i)
i = i + 1 -- skip '\n' or '\r'
local c = sub(z, i, i)
if (c == "\n" or c == "\r") and (c ~= old) then
i = i + 1 -- skip '\n\r' or '\r\n'
old = old..c
end
if is_tok then addtoken("TK_EOL", old) end
ln = ln + 1
I = i
return i
end
----------------------------------------------------------------------
-- initialize lexer for given source _z and source name _sourceid
----------------------------------------------------------------------
function init(_z, _sourceid)
z = _z -- source
sourceid = _sourceid -- name of source
I = 1 -- lexer's position in source
ln = 1 -- line number
tok = {} -- lexed token list*
seminfo = {} -- lexed semantic information list*
tokln = {} -- line numbers for messages*
-- (*) externally visible thru' module
--------------------------------------------------------------------
-- initial processing (shbang handling)
--------------------------------------------------------------------
local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
if p then -- skip first line
I = I + #q
addtoken("TK_COMMENT", q)
if #r > 0 then inclinenumber(I, true) end
end
end
----------------------------------------------------------------------
-- returns a chunk name or id, no truncation for long names
----------------------------------------------------------------------
function chunkid()
if sourceid and match(sourceid, "^[=@]") then
return sub(sourceid, 2) -- remove first char
end
return "[string]"
end
----------------------------------------------------------------------
-- formats error message and throws error
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------
function errorline(s, line)
local e = error or base.error
e(string.format("%s:%d: %s", chunkid(), line or ln, s))
end
local errorline = errorline
------------------------------------------------------------------------
-- count separators ("=") in a long string delimiter
------------------------------------------------------------------------
local function skip_sep(i)
local sub = sub
local s = sub(z, i, i)
i = i + 1
local count = #match(z, "=*", i) -- note, take the length
i = i + count
I = i
return (sub(z, i, i) == s) and count or (-count) - 1
end
----------------------------------------------------------------------
-- reads a long string or long comment
----------------------------------------------------------------------
local function read_long_string(is_str, sep)
local i = I + 1 -- skip 2nd '['
local sub = sub
local c = sub(z, i, i)
if c == "\r" or c == "\n" then -- string starts with a newline?
i = inclinenumber(i) -- skip it
end
local j = i
while true do
local p, q, r = find(z, "([\r\n%]])", i) -- (long range)
if not p then
errorline(is_str and "unfinished long string" or
"unfinished long comment")
end
i = p
if r == "]" then -- delimiter test
if skip_sep(i) == sep then
buff = sub(z, buff, I)
I = I + 1 -- skip 2nd ']'
return buff
end
i = I
else -- newline
buff = buff.."\n"
i = inclinenumber(i)
end
end--while
end
----------------------------------------------------------------------
-- reads a string
----------------------------------------------------------------------
local function read_string(del)
local i = I
local find = find
local sub = sub
while true do
local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range)
if p then
if r == "\n" or r == "\r" then
errorline("unfinished string")
end
i = p
if r == "\\" then -- handle escapes
i = i + 1
r = sub(z, i, i)
if r == "" then break end -- (EOZ error)
p = find("abfnrtv\n\r", r, 1, true)
------------------------------------------------------
if p then -- special escapes
if p > 7 then
i = inclinenumber(i)
else
i = i + 1
end
------------------------------------------------------
elseif find(r, "%D") then -- other non-digits
i = i + 1
------------------------------------------------------
else -- \xxx sequence
local p, q, s = find(z, "^(%d%d?%d?)", i)
i = q + 1
if s + 1 > 256 then -- UCHAR_MAX
errorline("escape sequence too large")
end
------------------------------------------------------
end--if p
else
i = i + 1
if r == del then -- ending delimiter
I = i
return sub(z, buff, i - 1) -- return string
end
end--if r
else
break -- (error)
end--if p
end--while
errorline("unfinished string")
end
------------------------------------------------------------------------
-- main lexer function
------------------------------------------------------------------------
function llex()
local find = find
local match = match
while true do--outer
local i = I
-- inner loop allows break to be used to nicely section tests
while true do--inner
----------------------------------------------------------------
local p, _, r = find(z, "^([_%a][_%w]*)", i)
if p then
I = i + #r
if kw[r] then
addtoken("TK_KEYWORD", r) -- reserved word (keyword)
else
addtoken("TK_NAME", r) -- identifier
end
break -- (continue)
end
----------------------------------------------------------------
local p, _, r = find(z, "^(%.?)%d", i)
if p then -- numeral
if r == "." then i = i + 1 end
local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
i = q + 1
if #r == 1 then -- optional exponent
if match(z, "^[%+%-]", i) then -- optional sign
i = i + 1
end
end
local _, q = find(z, "^[_%w]*", i)
I = q + 1
local v = sub(z, p, q) -- string equivalent
if not base.tonumber(v) then -- handles hex test also
errorline("malformed number")
end
addtoken("TK_NUMBER", v)
break -- (continue)
end
----------------------------------------------------------------
local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
if p then
if t == "\n" or t == "\r" then -- newline
inclinenumber(i, true)
else
I = q + 1 -- whitespace
addtoken("TK_SPACE", r)
end
break -- (continue)
end
----------------------------------------------------------------
local r = match(z, "^%p", i)
if r then
buff = i
local p = find("-[\"\'.=<>~", r, 1, true)
if p then
-- two-level if block for punctuation/symbols
--------------------------------------------------------
if p <= 2 then
if p == 1 then -- minus
local c = match(z, "^%-%-(%[?)", i)
if c then
i = i + 2
local sep = -1
if c == "[" then
sep = skip_sep(i)
end
if sep >= 0 then -- long comment
addtoken("TK_LCOMMENT", read_long_string(false, sep))
else -- short comment
I = find(z, "[\n\r]", i) or (#z + 1)
addtoken("TK_COMMENT", sub(z, buff, I - 1))
end
break -- (continue)
end
-- (fall through for "-")
else -- [ or long string
local sep = skip_sep(i)
if sep >= 0 then
addtoken("TK_LSTRING", read_long_string(true, sep))
elseif sep == -1 then
addtoken("TK_OP", "[")
else
errorline("invalid long string delimiter")
end
break -- (continue)
end
--------------------------------------------------------
elseif p <= 5 then
if p < 5 then -- strings
I = i + 1
addtoken("TK_STRING", read_string(r))
break -- (continue)
end
r = match(z, "^%.%.?%.?", i) -- .|..|... dots
-- (fall through)
--------------------------------------------------------
else -- relational
r = match(z, "^%p=?", i)
-- (fall through)
end
end
I = i + #r
addtoken("TK_OP", r) -- for other symbols, fall through
break -- (continue)
end
----------------------------------------------------------------
local r = sub(z, i, i)
if r ~= "" then
I = i + 1
addtoken("TK_OP", r) -- other single-char tokens
break
end
addtoken("TK_EOS", "") -- end of stream,
return -- exit here
----------------------------------------------------------------
end--while inner
end--while outer
end
return base.getfenv()
end)
package.preload['lparser'] = (function (...)
--[[--------------------------------------------------------------------
lparser.lua: Lua 5.1 parser in Lua
This file is part of LuaSrcDiet, based on Yueliang material.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * This is a version of the native 5.1.x parser from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) needs pre-built token tables instead of a module.method
-- (2) lparser.error is an optional error handler (from llex)
-- (3) not full parsing, currently fakes raw/unlexed constants
-- (4) parser() returns globalinfo, localinfo tables
-- * Please read technotes.txt for more technical details.
-- * NO support for 'arg' vararg functions (LUA_COMPAT_VARARG)
-- * A lot of the parser is unused, but might later be useful for
-- full-on parsing and analysis for a few measly bytes saved.
----------------------------------------------------------------------]]
local base = _G
local string = string
module "lparser"
local _G = base.getfenv()
--[[--------------------------------------------------------------------
-- variable and data structure initialization
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initialization: main variables
----------------------------------------------------------------------
local toklist, -- grammar-only token tables (token table,
seminfolist, -- semantic information table, line number
toklnlist, -- table, cross-reference table)
xreflist,
tpos, -- token position
line, -- start line # for error messages
lastln, -- last line # for ambiguous syntax chk
tok, seminfo, ln, xref, -- token, semantic info, line
nameref, -- proper position of <name> token
fs, -- current function state
top_fs, -- top-level function state
globalinfo, -- global variable information table
globallookup, -- global variable name lookup table
localinfo, -- local variable information table
ilocalinfo, -- inactive locals (prior to activation)
ilocalrefs -- corresponding references to activate
-- forward references for local functions
local explist1, expr, block, exp1, body, chunk
----------------------------------------------------------------------
-- initialization: data structures
----------------------------------------------------------------------
local gmatch = string.gmatch
local block_follow = {} -- lookahead check in chunk(), returnstat()
for v in gmatch("else elseif end until <eof>", "%S+") do
block_follow[v] = true
end
local stat_call = {} -- lookup for calls in stat()
for v in gmatch("if while do for repeat function local return break", "%S+") do
stat_call[v] = v.."_stat"
end
local binopr_left = {} -- binary operators, left priority
local binopr_right = {} -- binary operators, right priority
for op, lt, rt in gmatch([[
{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
{^ 10 9}{.. 5 4}
{~= 3 3}{== 3 3}
{< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
{and 2 2}{or 1 1}
]], "{(%S+)%s(%d+)%s(%d+)}") do
binopr_left[op] = lt + 0
binopr_right[op] = rt + 0
end
local unopr = { ["not"] = true, ["-"] = true,
["#"] = true, } -- unary operators
local UNARY_PRIORITY = 8 -- priority for unary operators
--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- formats error message and throws error (duplicated from llex)
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------
local function errorline(s, line)
local e = error or base.error
e(string.format("(source):%d: %s", line or ln, s))
end
----------------------------------------------------------------------
-- handles incoming token, semantic information pairs
-- * NOTE: 'nextt' is named 'next' originally
----------------------------------------------------------------------
-- reads in next token
local function nextt()
lastln = toklnlist[tpos]
tok, seminfo, ln, xref
= toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos]
tpos = tpos + 1
end
-- peek at next token (single lookahead for table constructor)
local function lookahead()
return toklist[tpos]
end
----------------------------------------------------------------------
-- throws a syntax error, or if token expected is not there
----------------------------------------------------------------------
local function syntaxerror(msg)
local tok = tok
if tok ~= "<number>" and tok ~= "<string>" then
if tok == "<name>" then tok = seminfo end
tok = "'"..tok.."'"
end
errorline(msg.." near "..tok)
end
local function error_expected(token)
syntaxerror("'"..token.."' expected")
end
----------------------------------------------------------------------
-- tests for a token, returns outcome
-- * return value changed to boolean
----------------------------------------------------------------------
local function testnext(c)
if tok == c then nextt(); return true end
end
----------------------------------------------------------------------
-- check for existence of a token, throws error if not found
----------------------------------------------------------------------
local function check(c)
if tok ~= c then error_expected(c) end
end
----------------------------------------------------------------------
-- verify existence of a token, then skip it
----------------------------------------------------------------------
local function checknext(c)
check(c); nextt()
end
----------------------------------------------------------------------
-- throws error if condition not matched
----------------------------------------------------------------------
local function check_condition(c, msg)
if not c then syntaxerror(msg) end
end
----------------------------------------------------------------------
-- verifies token conditions are met or else throw error
----------------------------------------------------------------------
local function check_match(what, who, where)
if not testnext(what) then
if where == ln then
error_expected(what)
else
syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
end
end
end
----------------------------------------------------------------------
-- expect that token is a name, return the name
----------------------------------------------------------------------
local function str_checkname()
check("<name>")
local ts = seminfo
nameref = xref
nextt()
return ts
end
----------------------------------------------------------------------
-- adds given string s in string pool, sets e as VK
----------------------------------------------------------------------
local function codestring(e, s)
e.k = "VK"
end
----------------------------------------------------------------------
-- consume a name token, adds it to string pool
----------------------------------------------------------------------
local function checkname(e)
codestring(e, str_checkname())
end
--[[--------------------------------------------------------------------
-- variable (global|local|upvalue) handling
-- * to track locals and globals, we can extend Yueliang's minimal
-- variable management code with little trouble
-- * entry point is singlevar() for variable lookups
-- * lookup tables (bl.locallist) are maintained awkwardly in the basic
-- block data structures, PLUS the function data structure (this is
-- an inelegant hack, since bl is nil for the top level of a function)
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- register a local variable, create local variable object, set in
-- to-activate variable list
-- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
-- localfunc(), localstat()
----------------------------------------------------------------------
local function new_localvar(name, special)
local bl = fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist = bl.locallist
else
locallist = fs.locallist
end
-- build local variable information object and set localinfo
local id = #localinfo + 1
localinfo[id] = { -- new local variable object
name = name, -- local variable name
xref = { nameref }, -- xref, first value is declaration
decl = nameref, -- location of declaration, = xref[1]
}
if special then -- "self" must be not be changed
localinfo[id].isself = true
end
-- this can override a local with the same name in the same scope
-- but first, keep it inactive until it gets activated
local i = #ilocalinfo + 1
ilocalinfo[i] = id
ilocalrefs[i] = locallist
end
----------------------------------------------------------------------
-- actually activate the variables so that they are visible
-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
-- * used in parlist(), forbody(), localfunc(), localstat(), body()
----------------------------------------------------------------------
local function adjustlocalvars(nvars)
local sz = #ilocalinfo
-- i goes from left to right, in order of local allocation, because
-- of something like: local a,a,a = 1,2,3 which gives a = 3
while nvars > 0 do
nvars = nvars - 1
local i = sz - nvars
local id = ilocalinfo[i] -- local's id
local obj = localinfo[id]
local name = obj.name -- name of local
obj.act = xref -- set activation location
ilocalinfo[i] = nil
local locallist = ilocalrefs[i] -- ref to lookup table to update
ilocalrefs[i] = nil
local existing = locallist[name] -- if existing, remove old first!
if existing then -- do not overlap, set special
obj = localinfo[existing] -- form of rem, as -id
obj.rem = -id
end
locallist[name] = id -- activate, now visible to Lua
end
end
----------------------------------------------------------------------
-- remove (deactivate) variables in current scope (before scope exits)
-- * zap entire locallist tables since we are not allocating registers
-- * used in leaveblock(), close_func()
----------------------------------------------------------------------
local function removevars()
local bl = fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist = bl.locallist
else
locallist = fs.locallist
end
-- enumerate the local list at current scope and deactivate 'em
for name, id in base.pairs(locallist) do
local obj = localinfo[id]
obj.rem = xref -- set deactivation location
end
end
----------------------------------------------------------------------
-- creates a new local variable given a name
-- * skips internal locals (those starting with '('), so internal
-- locals never needs a corresponding adjustlocalvars() call
-- * special is true for "self" which must not be optimized
-- * used in fornum(), forlist(), parlist(), body()
----------------------------------------------------------------------
local function new_localvarliteral(name, special)
if string.sub(name, 1, 1) == "(" then -- can skip internal locals
return
end
new_localvar(name, special)
end
----------------------------------------------------------------------
-- search the local variable namespace of the given fs for a match
-- * returns localinfo index
-- * used only in singlevaraux()
----------------------------------------------------------------------
local function searchvar(fs, n)
local bl = fs.bl
local locallist
if bl then
locallist = bl.locallist
while locallist do
if locallist[n] then return locallist[n] end -- found
bl = bl.prev
locallist = bl and bl.locallist
end
end
locallist = fs.locallist
return locallist[n] or -1 -- found or not found (-1)
end
----------------------------------------------------------------------
-- handle locals, globals and upvalues and related processing
-- * search mechanism is recursive, calls itself to search parents
-- * used only in singlevar()
----------------------------------------------------------------------
local function singlevaraux(fs, n, var)
if fs == nil then -- no more levels?
var.k = "VGLOBAL" -- default is global variable
return "VGLOBAL"
else
local v = searchvar(fs, n) -- look up at current level
if v >= 0 then
var.k = "VLOCAL"
var.id = v
-- codegen may need to deal with upvalue here
return "VLOCAL"
else -- not found at current level; try upper one
if singlevaraux(fs.prev, n, var) == "VGLOBAL" then
return "VGLOBAL"
end
-- else was LOCAL or UPVAL, handle here
var.k = "VUPVAL" -- upvalue in this level
return "VUPVAL"
end--if v
end--if fs
end
----------------------------------------------------------------------
-- consume a name token, creates a variable (global|local|upvalue)
-- * used in prefixexp(), funcname()
----------------------------------------------------------------------
local function singlevar(v)
local name = str_checkname()
singlevaraux(fs, name, v)
------------------------------------------------------------------
-- variable tracking
------------------------------------------------------------------
if v.k == "VGLOBAL" then
-- if global being accessed, keep track of it by creating an object
local id = globallookup[name]
if not id then
id = #globalinfo + 1
globalinfo[id] = { -- new global variable object
name = name, -- global variable name
xref = { nameref }, -- xref, first value is declaration
}
globallookup[name] = id -- remember it
else
local obj = globalinfo[id].xref
obj[#obj + 1] = nameref -- add xref
end
else
-- local/upvalue is being accessed, keep track of it
local id = v.id
local obj = localinfo[id].xref
obj[#obj + 1] = nameref -- add xref
end
end
--[[--------------------------------------------------------------------
-- state management functions with open/close pairs
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- enters a code unit, initializes elements
----------------------------------------------------------------------
local function enterblock(isbreakable)
local bl = {} -- per-block state
bl.isbreakable = isbreakable
bl.prev = fs.bl
bl.locallist = {}
fs.bl = bl
end
----------------------------------------------------------------------
-- leaves a code unit, close any upvalues
----------------------------------------------------------------------
local function leaveblock()
local bl = fs.bl
removevars()
fs.bl = bl.prev
end
----------------------------------------------------------------------
-- opening of a function
-- * top_fs is only for anchoring the top fs, so that parser() can
-- return it to the caller function along with useful output
-- * used in parser() and body()
----------------------------------------------------------------------
local function open_func()
local new_fs -- per-function state
if not fs then -- top_fs is created early
new_fs = top_fs
else
new_fs = {}
end
new_fs.prev = fs -- linked list of function states
new_fs.bl = nil
new_fs.locallist = {}
fs = new_fs
end
----------------------------------------------------------------------
-- closing of a function
-- * used in parser() and body()
----------------------------------------------------------------------
local function close_func()
removevars()
fs = fs.prev
end
--[[--------------------------------------------------------------------
-- other parsing functions
-- * for table constructor, parameter list, argument list
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- parse a function name suffix, for function call specifications
-- * used in primaryexp(), funcname()
----------------------------------------------------------------------
local function field(v)
-- field -> ['.' | ':'] NAME
local key = {}
nextt() -- skip the dot or colon
checkname(key)
v.k = "VINDEXED"
end
----------------------------------------------------------------------
-- parse a table indexing suffix, for constructors, expressions
-- * used in recfield(), primaryexp()
----------------------------------------------------------------------
local function yindex(v)
-- index -> '[' expr ']'
nextt() -- skip the '['
expr(v)
checknext("]")
end
----------------------------------------------------------------------
-- parse a table record (hash) field
-- * used in constructor()
----------------------------------------------------------------------
local function recfield(cc)
-- recfield -> (NAME | '['exp1']') = exp1
local key, val = {}, {}
if tok == "<name>" then
checkname(key)
else-- tok == '['
yindex(key)
end
checknext("=")
expr(val)
end
----------------------------------------------------------------------
-- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH)
-- * note: retained in this skeleton because it modifies cc.v.k
-- * used in constructor()
----------------------------------------------------------------------
local function closelistfield(cc)
if cc.v.k == "VVOID" then return end -- there is no list item
cc.v.k = "VVOID"
end
----------------------------------------------------------------------
-- parse a table list (array) field
-- * used in constructor()
----------------------------------------------------------------------
local function listfield(cc)
expr(cc.v)
end
----------------------------------------------------------------------
-- parse a table constructor
-- * used in funcargs(), simpleexp()
----------------------------------------------------------------------
local function constructor(t)
-- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
-- field -> recfield | listfield
-- fieldsep -> ',' | ';'
local line = ln
local cc = {}
cc.v = {}
cc.t = t
t.k = "VRELOCABLE"
cc.v.k = "VVOID"
checknext("{")
repeat
if tok == "}" then break end
-- closelistfield(cc) here
local c = tok
if c == "<name>" then -- may be listfields or recfields
if lookahead() ~= "=" then -- look ahead: expression?
listfield(cc)
else
recfield(cc)
end
elseif c == "[" then -- constructor_item -> recfield
recfield(cc)
else -- constructor_part -> listfield
listfield(cc)
end
until not testnext(",") and not testnext(";")
check_match("}", "{", line)
-- lastlistfield(cc) here
end
----------------------------------------------------------------------
-- parse the arguments (parameters) of a function declaration
-- * used in body()
----------------------------------------------------------------------
local function parlist()
-- parlist -> [ param { ',' param } ]
local nparams = 0
if tok ~= ")" then -- is 'parlist' not empty?
repeat
local c = tok
if c == "<name>" then -- param -> NAME
new_localvar(str_checkname())
nparams = nparams + 1
elseif c == "..." then
nextt()
fs.is_vararg = true
else
syntaxerror("<name> or '...' expected")
end
until fs.is_vararg or not testnext(",")
end--if
adjustlocalvars(nparams)
end
----------------------------------------------------------------------
-- parse the parameters of a function call
-- * contrast with parlist(), used in function declarations
-- * used in primaryexp()
----------------------------------------------------------------------
local function funcargs(f)
local args = {}
local line = ln
local c = tok
if c == "(" then -- funcargs -> '(' [ explist1 ] ')'
if line ~= lastln then
syntaxerror("ambiguous syntax (function call x new statement)")
end
nextt()
if tok == ")" then -- arg list is empty?
args.k = "VVOID"
else
explist1(args)
end
check_match(")", "(", line)
elseif c == "{" then -- funcargs -> constructor
constructor(args)
elseif c == "<string>" then -- funcargs -> STRING
codestring(args, seminfo)
nextt() -- must use 'seminfo' before 'next'
else
syntaxerror("function arguments expected")
return
end--if c
f.k = "VCALL"
end
--[[--------------------------------------------------------------------
-- mostly expression functions
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- parses an expression in parentheses or a single variable
-- * used in primaryexp()
----------------------------------------------------------------------
local function prefixexp(v)
-- prefixexp -> NAME | '(' expr ')'
local c = tok
if c == "(" then
local line = ln
nextt()
expr(v)
check_match(")", "(", line)
elseif c == "<name>" then
singlevar(v)
else
syntaxerror("unexpected symbol")
end--if c
end
----------------------------------------------------------------------
-- parses a prefixexp (an expression in parentheses or a single
-- variable) or a function call specification
-- * used in simpleexp(), assignment(), expr_stat()
----------------------------------------------------------------------
local function primaryexp(v)
-- primaryexp ->
-- prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
prefixexp(v)
while true do
local c = tok
if c == "." then -- field
field(v)
elseif c == "[" then -- '[' exp1 ']'
local key = {}
yindex(key)
elseif c == ":" then -- ':' NAME funcargs
local key = {}
nextt()
checkname(key)
funcargs(v)
elseif c == "(" or c == "<string>" or c == "{" then -- funcargs
funcargs(v)
else
return
end--if c
end--while
end
----------------------------------------------------------------------
-- parses general expression types, constants handled here
-- * used in subexpr()
----------------------------------------------------------------------
local function simpleexp(v)
-- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
-- constructor | FUNCTION body | primaryexp
local c = tok
if c == "<number>" then
v.k = "VKNUM"
elseif c == "<string>" then
codestring(v, seminfo)
elseif c == "nil" then
v.k = "VNIL"
elseif c == "true" then
v.k = "VTRUE"
elseif c == "false" then
v.k = "VFALSE"
elseif c == "..." then -- vararg
check_condition(fs.is_vararg == true,
"cannot use '...' outside a vararg function");
v.k = "VVARARG"
elseif c == "{" then -- constructor
constructor(v)
return
elseif c == "function" then
nextt()
body(v, false, ln)
return
else
primaryexp(v)
return
end--if c
nextt()
end
------------------------------------------------------------------------
-- Parse subexpressions. Includes handling of unary operators and binary
-- operators. A subexpr is given the rhs priority level of the operator
-- immediately left of it, if any (limit is -1 if none,) and if a binop
-- is found, limit is compared with the lhs priority level of the binop
-- in order to determine which executes first.
-- * recursively called
-- * used in expr()
------------------------------------------------------------------------
local function subexpr(v, limit)
-- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
-- * where 'binop' is any binary operator with a priority
-- higher than 'limit'
local op = tok
local uop = unopr[op]
if uop then
nextt()
subexpr(v, UNARY_PRIORITY)
else
simpleexp(v)
end
-- expand while operators have priorities higher than 'limit'
op = tok
local binop = binopr_left[op]
while binop and binop > limit do
local v2 = {}
nextt()
-- read sub-expression with higher priority
local nextop = subexpr(v2, binopr_right[op])
op = nextop
binop = binopr_left[op]
end
return op -- return first untreated operator
end
----------------------------------------------------------------------
-- Expression parsing starts here. Function subexpr is entered with the
-- left operator (which is non-existent) priority of -1, which is lower
-- than all actual operators. Expr information is returned in parm v.
-- * used in cond(), explist1(), index(), recfield(), listfield(),
-- prefixexp(), while_stat(), exp1()
----------------------------------------------------------------------
-- this is a forward-referenced local
function expr(v)
-- expr -> subexpr
subexpr(v, 0)
end
--[[--------------------------------------------------------------------
-- third level parsing functions
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- parse a variable assignment sequence
-- * recursively called
-- * used in expr_stat()
------------------------------------------------------------------------
local function assignment(v)
local e = {}
local c = v.v.k
check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL"
or c == "VINDEXED", "syntax error")
if testnext(",") then -- assignment -> ',' primaryexp assignment
local nv = {} -- expdesc
nv.v = {}
primaryexp(nv.v)
-- lparser.c deals with some register usage conflict here
assignment(nv)
else -- assignment -> '=' explist1
checknext("=")
explist1(e)
return -- avoid default
end
e.k = "VNONRELOC"
end
----------------------------------------------------------------------
-- parse a for loop body for both versions of the for loop
-- * used in fornum(), forlist()
----------------------------------------------------------------------
local function forbody(nvars, isnum)
-- forbody -> DO block
checknext("do")
enterblock(false) -- scope for declared variables
adjustlocalvars(nvars)
block()
leaveblock() -- end of scope for declared variables
end
----------------------------------------------------------------------
-- parse a numerical for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------
local function fornum(varname)
-- fornum -> NAME = exp1, exp1 [, exp1] DO body
local line = line
new_localvarliteral("(for index)")
new_localvarliteral("(for limit)")
new_localvarliteral("(for step)")
new_localvar(varname)
checknext("=")
exp1() -- initial value
checknext(",")
exp1() -- limit
if testnext(",") then
exp1() -- optional step
else
-- default step = 1
end
forbody(1, true)
end
----------------------------------------------------------------------
-- parse a generic for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------
local function forlist(indexname)
-- forlist -> NAME {, NAME} IN explist1 DO body
local e = {}
-- create control variables
new_localvarliteral("(for generator)")
new_localvarliteral("(for state)")
new_localvarliteral("(for control)")
-- create declared variables
new_localvar(indexname)
local nvars = 1
while testnext(",") do
new_localvar(str_checkname())
nvars = nvars + 1
end
checknext("in")
local line = line
explist1(e)
forbody(nvars, false)
end
----------------------------------------------------------------------
-- parse a function name specification
-- * used in func_stat()
----------------------------------------------------------------------
local function funcname(v)
-- funcname -> NAME {field} [':' NAME]
local needself = false
singlevar(v)
while tok == "." do
field(v)
end
if tok == ":" then
needself = true
field(v)
end
return needself
end
----------------------------------------------------------------------
-- parse the single expressions needed in numerical for loops
-- * used in fornum()
----------------------------------------------------------------------
-- this is a forward-referenced local
function exp1()
-- exp1 -> expr
local e = {}
expr(e)
end
----------------------------------------------------------------------
-- parse condition in a repeat statement or an if control structure
-- * used in repeat_stat(), test_then_block()
----------------------------------------------------------------------
local function cond()
-- cond -> expr
local v = {}
expr(v) -- read condition
end
----------------------------------------------------------------------
-- parse part of an if control structure, including the condition
-- * used in if_stat()
----------------------------------------------------------------------
local function test_then_block()
-- test_then_block -> [IF | ELSEIF] cond THEN block
nextt() -- skip IF or ELSEIF
cond()
checknext("then")
block() -- 'then' part
end
----------------------------------------------------------------------
-- parse a local function statement
-- * used in local_stat()
----------------------------------------------------------------------
local function localfunc()
-- localfunc -> NAME body
local v, b = {}
new_localvar(str_checkname())
v.k = "VLOCAL"
adjustlocalvars(1)
body(b, false, ln)
end
----------------------------------------------------------------------
-- parse a local variable declaration statement
-- * used in local_stat()
----------------------------------------------------------------------
local function localstat()
-- localstat -> NAME {',' NAME} ['=' explist1]
local nvars = 0
local e = {}
repeat
new_localvar(str_checkname())
nvars = nvars + 1
until not testnext(",")
if testnext("=") then
explist1(e)
else
e.k = "VVOID"
end
adjustlocalvars(nvars)
end
----------------------------------------------------------------------
-- parse a list of comma-separated expressions
-- * used in return_stat(), localstat(), funcargs(), assignment(),
-- forlist()
----------------------------------------------------------------------
-- this is a forward-referenced local
function explist1(e)
-- explist1 -> expr { ',' expr }
expr(e)
while testnext(",") do
expr(e)
end
end
----------------------------------------------------------------------
-- parse function declaration body
-- * used in simpleexp(), localfunc(), func_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function body(e, needself, line)
-- body -> '(' parlist ')' chunk END
open_func()
checknext("(")
if needself then
new_localvarliteral("self", true)
adjustlocalvars(1)
end
parlist()
checknext(")")
chunk()
check_match("end", "function", line)
close_func()
end
----------------------------------------------------------------------
-- parse a code block or unit
-- * used in do_stat(), while_stat(), forbody(), test_then_block(),
-- if_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function block()
-- block -> chunk
enterblock(false)
chunk()
leaveblock()
end
--[[--------------------------------------------------------------------
-- second level parsing functions, all with '_stat' suffix
-- * since they are called via a table lookup, they cannot be local
-- functions (a lookup table of local functions might be smaller...)
-- * stat() -> *_stat()
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initial parsing for a for loop, calls fornum() or forlist()
-- * removed 'line' parameter (used to set debug information only)
-- * used in stat()
----------------------------------------------------------------------
function for_stat()
-- stat -> for_stat -> FOR (fornum | forlist) END
local line = line
enterblock(true) -- scope for loop and control variables
nextt() -- skip 'for'
local varname = str_checkname() -- first variable name
local c = tok
if c == "=" then
fornum(varname)
elseif c == "," or c == "in" then
forlist(varname)
else
syntaxerror("'=' or 'in' expected")
end
check_match("end", "for", line)
leaveblock() -- loop scope (`break' jumps to this point)
end
----------------------------------------------------------------------
-- parse a while-do control structure, body processed by block()
-- * used in stat()
----------------------------------------------------------------------
function while_stat()
-- stat -> while_stat -> WHILE cond DO block END
local line = line
nextt() -- skip WHILE
cond() -- parse condition
enterblock(true)
checknext("do")
block()
check_match("end", "while", line)
leaveblock()
end
----------------------------------------------------------------------
-- parse a repeat-until control structure, body parsed by chunk()
-- * originally, repeatstat() calls breakstat() too if there is an
-- upvalue in the scope block; nothing is actually lexed, it is
-- actually the common code in breakstat() for closing of upvalues
-- * used in stat()
----------------------------------------------------------------------
function repeat_stat()
-- stat -> repeat_stat -> REPEAT block UNTIL cond
local line = line
enterblock(true) -- loop block
enterblock(false) -- scope block
nextt() -- skip REPEAT
chunk()
check_match("until", "repeat", line)
cond()
-- close upvalues at scope level below
leaveblock() -- finish scope
leaveblock() -- finish loop
end
----------------------------------------------------------------------
-- parse an if control structure
-- * used in stat()
----------------------------------------------------------------------
function if_stat()
-- stat -> if_stat -> IF cond THEN block
-- {ELSEIF cond THEN block} [ELSE block] END
local line = line
local v = {}
test_then_block() -- IF cond THEN block
while tok == "elseif" do
test_then_block() -- ELSEIF cond THEN block
end
if tok == "else" then
nextt() -- skip ELSE
block() -- 'else' part
end
check_match("end", "if", line)
end
----------------------------------------------------------------------
-- parse a return statement
-- * used in stat()
----------------------------------------------------------------------
function return_stat()
-- stat -> return_stat -> RETURN explist
local e = {}
nextt() -- skip RETURN
local c = tok
if block_follow[c] or c == ";" then
-- return no values
else
explist1(e) -- optional return values
end
end
----------------------------------------------------------------------
-- parse a break statement
-- * used in stat()
----------------------------------------------------------------------
function break_stat()
-- stat -> break_stat -> BREAK
local bl = fs.bl
nextt() -- skip BREAK
while bl and not bl.isbreakable do -- find a breakable block
bl = bl.prev
end
if not bl then
syntaxerror("no loop to break")
end
end
----------------------------------------------------------------------
-- parse a function call with no returns or an assignment statement
-- * the struct with .prev is used for name searching in lparse.c,
-- so it is retained for now; present in assignment() also
-- * used in stat()
----------------------------------------------------------------------
function expr_stat()
-- stat -> expr_stat -> func | assignment
local v = {}
v.v = {}
primaryexp(v.v)
if v.v.k == "VCALL" then -- stat -> func
-- call statement uses no results
else -- stat -> assignment
v.prev = nil
assignment(v)
end
end
----------------------------------------------------------------------
-- parse a function statement
-- * used in stat()
----------------------------------------------------------------------
function function_stat()
-- stat -> function_stat -> FUNCTION funcname body
local line = line
local v, b = {}, {}
nextt() -- skip FUNCTION
local needself = funcname(v)
body(b, needself, line)
end
----------------------------------------------------------------------
-- parse a simple block enclosed by a DO..END pair
-- * used in stat()
----------------------------------------------------------------------
function do_stat()
-- stat -> do_stat -> DO block END
local line = line
nextt() -- skip DO
block()
check_match("end", "do", line)
end
----------------------------------------------------------------------
-- parse a statement starting with LOCAL
-- * used in stat()
----------------------------------------------------------------------
function local_stat()
-- stat -> local_stat -> LOCAL FUNCTION localfunc
-- -> LOCAL localstat
nextt() -- skip LOCAL
if testnext("function") then -- local function?
localfunc()
else
localstat()
end
end
--[[--------------------------------------------------------------------
-- main functions, top level parsing functions
-- * accessible functions are: init(lexer), parser()
-- * [entry] -> parser() -> chunk() -> stat()
----------------------------------------------------------------------]]
----------------------------------------------------------------------
-- initial parsing for statements, calls '_stat' suffixed functions
-- * used in chunk()
----------------------------------------------------------------------
local function stat()
-- stat -> if_stat while_stat do_stat for_stat repeat_stat
-- function_stat local_stat return_stat break_stat
-- expr_stat
line = ln -- may be needed for error messages
local c = tok
local fn = stat_call[c]
-- handles: if while do for repeat function local return break
if fn then
_G[fn]()
-- return or break must be last statement
if c == "return" or c == "break" then return true end
else
expr_stat()
end
return false
end
----------------------------------------------------------------------
-- parse a chunk, which consists of a bunch of statements
-- * used in parser(), body(), block(), repeat_stat()
----------------------------------------------------------------------
-- this is a forward-referenced local
function chunk()
-- chunk -> { stat [';'] }
local islast = false
while not islast and not block_follow[tok] do
islast = stat()
testnext(";")
end
end
----------------------------------------------------------------------
-- performs parsing, returns parsed data structure
----------------------------------------------------------------------
function parser()
open_func()
fs.is_vararg = true -- main func. is always vararg
nextt() -- read first token
chunk()
check("<eof>")
close_func()
return globalinfo, localinfo
end
----------------------------------------------------------------------
-- initialization function
----------------------------------------------------------------------
function init(tokorig, seminfoorig, toklnorig)
tpos = 1 -- token position
top_fs = {} -- reset top level function state
------------------------------------------------------------------
-- set up grammar-only token tables; impedance-matching...
-- note that constants returned by the lexer is source-level, so
-- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
------------------------------------------------------------------
local j = 1
toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {}
for i = 1, #tokorig do
local tok = tokorig[i]
local yep = true
if tok == "TK_KEYWORD" or tok == "TK_OP" then
tok = seminfoorig[i]
elseif tok == "TK_NAME" then
tok = "<name>"
seminfolist[j] = seminfoorig[i]
elseif tok == "TK_NUMBER" then
tok = "<number>"
seminfolist[j] = 0 -- fake!
elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
tok = "<string>"
seminfolist[j] = "" -- fake!
elseif tok == "TK_EOS" then
tok = "<eof>"
else
-- non-grammar tokens; ignore them
yep = false
end
if yep then -- set rest of the information
toklist[j] = tok
toklnlist[j] = toklnorig[i]
xreflist[j] = i
j = j + 1
end
end--for
------------------------------------------------------------------
-- initialize data structures for variable tracking
------------------------------------------------------------------
globalinfo, globallookup, localinfo = {}, {}, {}
ilocalinfo, ilocalrefs = {}, {}
end
return _G
end)
package.preload['optlex'] = (function (...)
--[[--------------------------------------------------------------------
optlex.lua: does lexer-based optimizations
This file is part of LuaSrcDiet.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * For more lexer-based optimization ideas, see the TODO items or
-- look at technotes.txt.
-- * TODO: general string delimiter conversion optimizer
-- * TODO: (numbers) warn if overly significant digit
----------------------------------------------------------------------]]
local base = _G
local string = string
module "optlex"
local match = string.match
local sub = string.sub
local find = string.find
local rep = string.rep
local print
------------------------------------------------------------------------
-- variables and data structures
------------------------------------------------------------------------
-- error function, can override by setting own function into module
error = base.error
warn = {} -- table for warning flags
local stoks, sinfos, stoklns -- source lists
local is_realtoken = { -- significant (grammar) tokens
TK_KEYWORD = true,
TK_NAME = true,
TK_NUMBER = true,
TK_STRING = true,
TK_LSTRING = true,
TK_OP = true,
TK_EOS = true,
}
local is_faketoken = { -- whitespace (non-grammar) tokens
TK_COMMENT = true,
TK_LCOMMENT = true,
TK_EOL = true,
TK_SPACE = true,
}
local opt_details -- for extra information
------------------------------------------------------------------------
-- true if current token is at the start of a line
-- * skips over deleted tokens via recursion
------------------------------------------------------------------------
local function atlinestart(i)
local tok = stoks[i - 1]
if i <= 1 or tok == "TK_EOL" then
return true
elseif tok == "" then
return atlinestart(i - 1)
end
return false
end
------------------------------------------------------------------------
-- true if current token is at the end of a line
-- * skips over deleted tokens via recursion
------------------------------------------------------------------------
local function atlineend(i)
local tok = stoks[i + 1]
if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
return true
elseif tok == "" then
return atlineend(i + 1)
end
return false
end
------------------------------------------------------------------------
-- counts comment EOLs inside a long comment
-- * in order to keep line numbering, EOLs need to be reinserted
------------------------------------------------------------------------
local function commenteols(lcomment)
local sep = #match(lcomment, "^%-%-%[=*%[")
local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
local i, c = 1, 0
while true do
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
if not p then break end -- if no matches, done
i = p + 1
c = c + 1
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
end
return c
end
------------------------------------------------------------------------
-- compares two tokens (i, j) and returns the whitespace required
-- * important! see technotes.txt for more information
-- * only two grammar/real tokens are being considered
-- * if "", no separation is needed
-- * if " ", then at least one whitespace (or EOL) is required
------------------------------------------------------------------------
local function checkpair(i, j)
local match = match
local t1, t2 = stoks[i], stoks[j]
--------------------------------------------------------------------
if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
t2 == "TK_STRING" or t2 == "TK_LSTRING" then
return ""
--------------------------------------------------------------------
elseif t1 == "TK_OP" or t2 == "TK_OP" then
if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
(t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
return ""
end
if t1 == "TK_OP" and t2 == "TK_OP" then
-- for TK_OP/TK_OP pairs, see notes in technotes.txt
local op, op2 = sinfos[i], sinfos[j]
if (match(op, "^%.%.?$") and match(op2, "^%.")) or
(match(op, "^[~=<>]$") and op2 == "=") or
(op == "[" and (op2 == "[" or op2 == "=")) then
return " "
end
return ""
end
-- "TK_OP" + "TK_NUMBER" case
local op = sinfos[i]
if t2 == "TK_OP" then op = sinfos[j] end
if match(op, "^%.%.?%.?$") then
return " "
end
return ""
--------------------------------------------------------------------
else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
return " "
--------------------------------------------------------------------
end
end
------------------------------------------------------------------------
-- repack tokens, removing deletions caused by optimization process
------------------------------------------------------------------------
local function repack_tokens()
local dtoks, dinfos, dtoklns = {}, {}, {}
local j = 1
for i = 1, #stoks do
local tok = stoks[i]
if tok ~= "" then
dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
j = j + 1
end
end
stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
end
------------------------------------------------------------------------
-- number optimization
-- * optimization using string formatting functions is one way of doing
-- this, but here, we consider all cases and handle them separately
-- (possibly an idiotic approach...)
-- * scientific notation being generated is not in canonical form, this
-- may or may not be a bad thing, feedback welcome
-- * note: intermediate portions need to fit into a normal number range
-- * optimizations can be divided based on number patterns:
-- * hexadecimal:
-- (1) no need to remove leading zeros, just skip to (2)
-- (2) convert to integer if size equal or smaller
-- * change if equal size -> lose the 'x' to reduce entropy
-- (3) number is then processed as an integer
-- (4) note: does not make 0[xX] consistent
-- * integer:
-- (1) note: includes anything with trailing ".", ".0", ...
-- (2) remove useless fractional part, if present, e.g. 123.000
-- (3) remove leading zeros, e.g. 000123
-- (4) switch to scientific if shorter, e.g. 123000 -> 123e3
-- * with fraction:
-- (1) split into digits dot digits
-- (2) if no integer portion, take as zero (can omit later)
-- (3) handle degenerate .000 case, after which the fractional part
-- must be non-zero (if zero, it's matched as an integer)
-- (4) remove trailing zeros for fractional portion
-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
-- (6) otherwise p == 0 and the form is .q, e.g. .000123
-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
-- * scientific:
-- (1) split into (digits dot digits) [eE] ([+-] digits)
-- (2) if significand has ".", shift it out so it becomes an integer
-- (3) if significand is zero, just use zero
-- (4) remove leading zeros for significand
-- (5) shift out trailing zeros for significand
-- (6) examine exponent and determine which format is best:
-- integer, with fraction, scientific
------------------------------------------------------------------------
local function do_number(i)
local before = sinfos[i] -- 'before'
local z = before -- working representation
local y -- 'after', if better
--------------------------------------------------------------------
if match(z, "^0[xX]") then -- hexadecimal number
local v = base.tostring(base.tonumber(z))
if #v <= #z then
z = v -- change to integer, AND continue
else
return -- no change; stick to hex
end
end
--------------------------------------------------------------------
if match(z, "^%d+%.?0*$") then -- integer or has useless frac
z = match(z, "^(%d+)%.?0*$") -- int portion only
if z + 0 > 0 then
z = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
local v = #match(z, "0*$")
local nv = base.tostring(v)
if v > #nv + 1 then -- scientific is shorter
z = sub(z, 1, #z - v).."e"..nv
end
y = z
else
y = "0" -- basic zero
end
--------------------------------------------------------------------
elseif not match(z, "[eE]") then -- number with fraction part
local p, q = match(z, "^(%d*)%.(%d+)$") -- split
if p == "" then p = 0 end -- int part zero
if q + 0 == 0 and p == 0 then
y = "0" -- degenerate .000 case
else
-- now, q > 0 holds and p is a number
local v = #match(q, "0*$") -- remove trailing zeros
if v > 0 then
q = sub(q, 1, #q - v)
end
-- if p > 0, nothing else we can do to simplify p.q case
if p + 0 > 0 then
y = p.."."..q
else
y = "."..q -- tentative, e.g. .000123
local v = #match(q, "^0*") -- # leading spaces
local w = #q - v -- # significant digits
local nv = base.tostring(#q)
-- e.g. compare 123e-6 versus .000123
if w + 2 + #nv < 1 + #q then
y = sub(q, -w).."e-"..nv
end
end
end
--------------------------------------------------------------------
else -- scientific number
local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
ex = base.tonumber(ex)
-- if got ".", shift out fractional portion of significand
local p, q = match(sig, "^(%d*)%.(%d*)$")
if p then
ex = ex - #q
sig = p..q
end
if sig + 0 == 0 then
y = "0" -- basic zero
else
local v = #match(sig, "^0*") -- remove leading zeros
sig = sub(sig, v + 1)
v = #match(sig, "0*$") -- shift out trailing zeros
if v > 0 then
sig = sub(sig, 1, #sig - v)
ex = ex + v
end
-- examine exponent and determine which format is best
local nex = base.tostring(ex)
if ex == 0 then -- it's just an integer
y = sig
elseif ex > 0 and (ex <= 1 + #nex) then -- a number
y = sig..rep("0", ex)
elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
v = #sig + ex
y = sub(sig, 1, v).."."..sub(sig, v + 1)
elseif ex < 0 and (#nex >= -ex - #sig) then
-- e.g. compare 1234e-5 versus .01234
-- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
-- -> #nex >= -ex - #sig
v = -ex - #sig
y = "."..rep("0", v)..sig
else -- non-canonical scientific representation
y = sig.."e"..ex
end
end--if sig
end
--------------------------------------------------------------------
if y and y ~= sinfos[i] then
if opt_details then
print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
opt_details = opt_details + 1
end
sinfos[i] = y
end
end
------------------------------------------------------------------------
-- string optimization
-- * note: works on well-formed strings only!
-- * optimizations on characters can be summarized as follows:
-- \a\b\f\n\r\t\v -- no change
-- \\ -- no change
-- \"\' -- depends on delim, other can remove \
-- \[\] -- remove \
-- \<char> -- general escape, remove \
-- \<eol> -- normalize the EOL only
-- \ddd -- if \a\b\f\n\r\t\v, change to latter
-- if other < ascii 32, keep ddd but zap leading zeros
-- if >= ascii 32, translate it into the literal, then also
-- do escapes for \\,\",\' cases
-- <other> -- no change
-- * switch delimiters if string becomes shorter
------------------------------------------------------------------------
local function do_string(I)
local info = sinfos[I]
local delim = sub(info, 1, 1) -- delimiter used
local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
local z = sub(info, 2, -2) -- actual string
local i = 1
local c_delim, c_ndelim = 0, 0 -- "/' counts
--------------------------------------------------------------------
while i <= #z do
local c = sub(z, i, i)
----------------------------------------------------------------
if c == "\\" then -- escaped stuff
local j = i + 1
local d = sub(z, j, j)
local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
------------------------------------------------------------
if not p then -- \<char> -- remove \
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
------------------------------------------------------------
elseif p <= 8 then -- \a\b\f\n\r\t\v\\
i = i + 2 -- no change
------------------------------------------------------------
elseif p <= 10 then -- \<eol> -- normalize EOL
local eol = sub(z, j, j + 1)
if eol == "\r\n" or eol == "\n\r" then
z = sub(z, 1, i).."\n"..sub(z, j + 2)
elseif p == 10 then -- \r case
z = sub(z, 1, i).."\n"..sub(z, j + 1)
end
i = i + 2
------------------------------------------------------------
elseif p <= 12 then -- \"\' -- remove \ for ndelim
if d == delim then
c_delim = c_delim + 1
i = i + 2
else
c_ndelim = c_ndelim + 1
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
end
------------------------------------------------------------
else -- \ddd -- various steps
local s = match(z, "^(%d%d?%d?)", j)
j = i + 1 + #s -- skip to location
local cv = s + 0
local cc = string.char(cv)
local p = find("\a\b\f\n\r\t\v", cc, 1, true)
if p then -- special escapes
s = "\\"..sub("abfnrtv", p, p)
elseif cv < 32 then -- normalized \ddd
s = "\\"..cv
elseif cc == delim then -- \<delim>
s = "\\"..cc
c_delim = c_delim + 1
elseif cc == "\\" then -- \\
s = "\\\\"
else -- literal character
s = cc
if cc == ndelim then
c_ndelim = c_ndelim + 1
end
end
z = sub(z, 1, i - 1)..s..sub(z, j)
i = i + #s
------------------------------------------------------------
end--if p
----------------------------------------------------------------
else-- c ~= "\\" -- <other> -- no change
i = i + 1
if c == ndelim then -- count ndelim, for switching delimiters
c_ndelim = c_ndelim + 1
end
----------------------------------------------------------------
end--if c
end--while
--------------------------------------------------------------------
-- switching delimiters, a long-winded derivation:
-- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
-- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
-- simplifying the condition (1)>(2) --> c_delim > c_ndelim
if c_delim > c_ndelim then
i = 1
while i <= #z do
local p, q, r = find(z, "([\'\"])", i)
if not p then break end
if r == delim then -- \<delim> -> <delim>
z = sub(z, 1, p - 2)..sub(z, p)
i = p
else-- r == ndelim -- <ndelim> -> \<ndelim>
z = sub(z, 1, p - 1).."\\"..sub(z, p)
i = p + 2
end
end--while
delim = ndelim -- actually change delimiters
end
--------------------------------------------------------------------
z = delim..z..delim
if z ~= sinfos[I] then
if opt_details then
print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
opt_details = opt_details + 1
end
sinfos[I] = z
end
end
------------------------------------------------------------------------
-- long string optimization
-- * note: warning flagged if trailing whitespace found, not trimmed
-- * remove first optional newline
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
------------------------------------------------------------------------
local function do_lstring(I)
local info = sinfos[I]
local delim1 = match(info, "^%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -sep, -1)
local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
local y = ""
local i = 1
--------------------------------------------------------------------
while true do
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- flag a warning if there are trailing spaces, won't optimize!
if match(ln, "%s+$") then
warn.lstring = "trailing whitespace in long string near line "..stoklns[I]
end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
-- skip first newline, which can be safely deleted
if not(i == 1 and i == p) then
y = y.."\n"
end
end
end--while
--------------------------------------------------------------------
-- handle possible deletion of one or more '=' separators
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "["..sep.."[", "]"..sep.."]"
end
end
--------------------------------------------------------------------
sinfos[I] = delim1..y..delim2
end
------------------------------------------------------------------------
-- long comment optimization
-- * note: does not remove first optional newline
-- * trim trailing whitespace
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
------------------------------------------------------------------------
local function do_lcomment(I)
local info = sinfos[I]
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -sep, -1)
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
local y = ""
local i = 1
--------------------------------------------------------------------
while true do
local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line, extract and check trailing whitespace
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- trim trailing whitespace if non-empty line
local ws = match(ln, "%s*$")
if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
y = y.."\n"
end
end--while
--------------------------------------------------------------------
-- handle possible deletion of one or more '=' separators
sep = sep - 2
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
end
end
--------------------------------------------------------------------
sinfos[I] = delim1..y..delim2
end
------------------------------------------------------------------------
-- short comment optimization
-- * trim trailing whitespace
------------------------------------------------------------------------
local function do_comment(i)
local info = sinfos[i]
local ws = match(info, "%s*$") -- just look from end of string
if #ws > 0 then
info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
end
sinfos[i] = info
end
------------------------------------------------------------------------
-- returns true if string found in long comment
-- * this is a feature to keep copyright or license texts
------------------------------------------------------------------------
local function keep_lcomment(opt_keep, info)
if not opt_keep then return false end -- option not set
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -sep, -1)
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
if find(z, opt_keep, 1, true) then -- try to match
return true
end
end
------------------------------------------------------------------------
-- main entry point
-- * currently, lexer processing has 2 passes
-- * processing is done on a line-oriented basis, which is easier to
-- grok due to the next point...
-- * since there are various options that can be enabled or disabled,
-- processing is a little messy or convoluted
------------------------------------------------------------------------
function optimize(option, toklist, semlist, toklnlist)
--------------------------------------------------------------------
-- set option flags
--------------------------------------------------------------------
local opt_comments = option["opt-comments"]
local opt_whitespace = option["opt-whitespace"]
local opt_emptylines = option["opt-emptylines"]
local opt_eols = option["opt-eols"]
local opt_strings = option["opt-strings"]
local opt_numbers = option["opt-numbers"]
local opt_keep = option.KEEP
opt_details = option.DETAILS and 0 -- upvalues for details display
print = print or base.print
if opt_eols then -- forced settings, otherwise won't work properly
opt_comments = true
opt_whitespace = true
opt_emptylines = true
end
--------------------------------------------------------------------
-- variable initialization
--------------------------------------------------------------------
stoks, sinfos, stoklns -- set source lists
= toklist, semlist, toklnlist
local i = 1 -- token position
local tok, info -- current token
local prev -- position of last grammar token
-- on same line (for TK_SPACE stuff)
--------------------------------------------------------------------
-- changes a token, info pair
--------------------------------------------------------------------
local function settoken(tok, info, I)
I = I or i
stoks[I] = tok or ""
sinfos[I] = info or ""
end
--------------------------------------------------------------------
-- processing loop (PASS 1)
--------------------------------------------------------------------
while true do
tok, info = stoks[i], sinfos[i]
----------------------------------------------------------------
local atstart = atlinestart(i) -- set line begin flag
if atstart then prev = nil end
----------------------------------------------------------------
if tok == "TK_EOS" then -- end of stream/pass
break
----------------------------------------------------------------
elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
tok == "TK_NAME" or -- operators
tok == "TK_OP" then
-- TK_KEYWORD and TK_OP can't be optimized without a big
-- optimization framework; it would be more of an optimizing
-- compiler, not a source code compressor
-- TK_NAME that are locals needs parser to analyze/optimize
prev = i
----------------------------------------------------------------
elseif tok == "TK_NUMBER" then -- numbers
if opt_numbers then
do_number(i) -- optimize
end
prev = i
----------------------------------------------------------------
elseif tok == "TK_STRING" or -- strings, long strings
tok == "TK_LSTRING" then
if opt_strings then
if tok == "TK_STRING" then
do_string(i) -- optimize
else
do_lstring(i) -- optimize
end
end
prev = i
----------------------------------------------------------------
elseif tok == "TK_COMMENT" then -- short comments
if opt_comments then
if i == 1 and sub(info, 1, 1) == "#" then
-- keep shbang comment, trim whitespace
do_comment(i)
else
-- safe to delete, as a TK_EOL (or TK_EOS) always follows
settoken() -- remove entirely
end
elseif opt_whitespace then -- trim whitespace only
do_comment(i)
end
----------------------------------------------------------------
elseif tok == "TK_LCOMMENT" then -- long comments
if keep_lcomment(opt_keep, info) then
------------------------------------------------------------
-- if --keep, we keep a long comment if <msg> is found;
-- this is a feature to keep copyright or license texts
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
elseif opt_comments then
local eols = commenteols(info)
------------------------------------------------------------
-- prepare opt_emptylines case first, if a disposable token
-- follows, current one is safe to dump, else keep a space;
-- it is implied that the operation is safe for '-', because
-- current is a TK_LCOMMENT, and must be separate from a '-'
if is_faketoken[stoks[i + 1]] then
settoken() -- remove entirely
tok = ""
else
settoken("TK_SPACE", " ")
end
------------------------------------------------------------
-- if there are embedded EOLs to keep and opt_emptylines is
-- disabled, then switch the token into one or more EOLs
if not opt_emptylines and eols > 0 then
settoken("TK_EOL", rep("\n", eols))
end
------------------------------------------------------------
-- if optimizing whitespaces, force reinterpretation of the
-- token to give a chance for the space to be optimized away
if opt_whitespace and tok ~= "" then
i = i - 1 -- to reinterpret
end
------------------------------------------------------------
else -- disabled case
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
end
----------------------------------------------------------------
elseif tok == "TK_EOL" then -- line endings
if atstart and opt_emptylines then
settoken() -- remove entirely
elseif info == "\r\n" or info == "\n\r" then
-- normalize the rest of the EOLs for CRLF/LFCR only
-- (note that TK_LCOMMENT can change into several EOLs)
settoken("TK_EOL", "\n")
end
----------------------------------------------------------------
elseif tok == "TK_SPACE" then -- whitespace
if opt_whitespace then
if atstart or atlineend(i) then
-- delete leading and trailing whitespace
settoken() -- remove entirely
else
------------------------------------------------------------
-- at this point, since leading whitespace have been removed,
-- there should be a either a real token or a TK_LCOMMENT
-- prior to hitting this whitespace; the TK_LCOMMENT case
-- only happens if opt_comments is disabled; so prev ~= nil
local ptok = stoks[prev]
if ptok == "TK_LCOMMENT" then
-- previous TK_LCOMMENT can abut with anything
settoken() -- remove entirely
else
-- prev must be a grammar token; consecutive TK_SPACE
-- tokens is impossible when optimizing whitespace
local ntok = stoks[i + 1]
if is_faketoken[ntok] then
-- handle special case where a '-' cannot abut with
-- either a short comment or a long comment
if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
ptok == "TK_OP" and sinfos[prev] == "-" then
-- keep token
else
settoken() -- remove entirely
end
else--is_realtoken
-- check a pair of grammar tokens, if can abut, then
-- delete space token entirely, otherwise keep one space
local s = checkpair(prev, i + 1)
if s == "" then
settoken() -- remove entirely
else
settoken("TK_SPACE", " ")
end
end
end
------------------------------------------------------------
end
end
----------------------------------------------------------------
else
error("unidentified token encountered")
end
----------------------------------------------------------------
i = i + 1
end--while
repack_tokens()
--------------------------------------------------------------------
-- processing loop (PASS 2)
--------------------------------------------------------------------
if opt_eols then
i = 1
-- aggressive EOL removal only works with most non-grammar tokens
-- optimized away because it is a rather simple scheme -- basically
-- it just checks 'real' token pairs around EOLs
if stoks[1] == "TK_COMMENT" then
-- first comment still existing must be shbang, skip whole line
i = 3
end
while true do
tok, info = stoks[i], sinfos[i]
--------------------------------------------------------------
if tok == "TK_EOS" then -- end of stream/pass
break
--------------------------------------------------------------
elseif tok == "TK_EOL" then -- consider each TK_EOL
local t1, t2 = stoks[i - 1], stoks[i + 1]
if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
local s = checkpair(i - 1, i + 1)
if s == "" then
settoken() -- remove entirely
end
end
end--if tok
--------------------------------------------------------------
i = i + 1
end--while
repack_tokens()
end
--------------------------------------------------------------------
if opt_details and opt_details > 0 then print() end -- spacing
return stoks, sinfos, stoklns
end
end)
package.preload['optparser'] = (function (...)
--[[--------------------------------------------------------------------
optparser.lua: does parser-based optimizations
This file is part of LuaSrcDiet.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * For more parser-based optimization ideas, see the TODO items or
-- look at technotes.txt.
-- * The processing load is quite significant, but since this is an
-- off-line text processor, I believe we can wait a few seconds.
-- * TODO: might process "local a,a,a" wrongly... need tests!
-- * TODO: remove position handling if overlapped locals (rem < 0)
-- needs more study, to check behaviour
-- * TODO: there are probably better ways to do allocation, e.g. by
-- choosing better methods to sort and pick locals...
-- * TODO: we don't need 53*63 two-letter identifiers; we can make
-- do with significantly less depending on how many that are really
-- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead
----------------------------------------------------------------------]]
local base = _G
local string = string
local table = table
module "optparser"
----------------------------------------------------------------------
-- Letter frequencies for reducing symbol entropy (fixed version)
-- * Might help a wee bit when the output file is compressed
-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
-- * We use letter frequencies according to a Linotype keyboard, plus
-- the underscore, and both lower case and upper case letters.
-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
-- * This is certainly not optimal, but is quick-and-dirty and the
-- process has no significant overhead
----------------------------------------------------------------------
local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
-- names or identifiers that must be skipped
-- * the first two lines are for keywords
local SKIP_NAME = {}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while
self]], "%S+") do
SKIP_NAME[v] = true
end
------------------------------------------------------------------------
-- variables and data structures
------------------------------------------------------------------------
local toklist, seminfolist, -- token lists
globalinfo, localinfo, -- variable information tables
globaluniq, localuniq, -- unique name tables
var_new, -- index of new variable names
varlist -- list of output variables
----------------------------------------------------------------------
-- preprocess information table to get lists of unique names
----------------------------------------------------------------------
local function preprocess(infotable)
local uniqtable = {}
for i = 1, #infotable do -- enumerate info table
local obj = infotable[i]
local name = obj.name
--------------------------------------------------------------------
if not uniqtable[name] then -- not found, start an entry
uniqtable[name] = {
decl = 0, token = 0, size = 0,
}
end
--------------------------------------------------------------------
local uniq = uniqtable[name] -- count declarations, tokens, size
uniq.decl = uniq.decl + 1
local xref = obj.xref
local xcount = #xref
uniq.token = uniq.token + xcount
uniq.size = uniq.size + xcount * #name
--------------------------------------------------------------------
if obj.decl then -- if local table, create first,last pairs
obj.id = i
obj.xcount = xcount
if xcount > 1 then -- if ==1, means local never accessed
obj.first = xref[2]
obj.last = xref[xcount]
end
--------------------------------------------------------------------
else -- if global table, add a back ref
uniq.id = i
end
--------------------------------------------------------------------
end--for
return uniqtable
end
----------------------------------------------------------------------
-- calculate actual symbol frequencies, in order to reduce entropy
-- * this may help further reduce the size of compressed sources
-- * note that since parsing optimizations is put before lexing
-- optimizations, the frequency table is not exact!
-- * yes, this will miss --keep block comments too...
----------------------------------------------------------------------
local function recalc_for_entropy(option)
local byte = string.byte
local char = string.char
-- table of token classes to accept in calculating symbol frequency
local ACCEPT = {
TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
TK_STRING = true, TK_LSTRING = true,
}
if not option["opt-comments"] then
ACCEPT.TK_COMMENT = true
ACCEPT.TK_LCOMMENT = true
end
--------------------------------------------------------------------
-- create a new table and remove any original locals by filtering
--------------------------------------------------------------------
local filtered = {}
for i = 1, #toklist do
filtered[i] = seminfolist[i]
end
for i = 1, #localinfo do -- enumerate local info table
local obj = localinfo[i]
local xref = obj.xref
for j = 1, obj.xcount do
local p = xref[j]
filtered[p] = "" -- remove locals
end
end
--------------------------------------------------------------------
local freq = {} -- reset symbol frequency table
for i = 0, 255 do freq[i] = 0 end
for i = 1, #toklist do -- gather symbol frequency
local tok, info = toklist[i], filtered[i]
if ACCEPT[tok] then
for j = 1, #info do
local c = byte(info, j)
freq[c] = freq[c] + 1
end
end--if
end--for
--------------------------------------------------------------------
-- function to re-sort symbols according to actual frequencies
--------------------------------------------------------------------
local function resort(symbols)
local symlist = {}
for i = 1, #symbols do -- prepare table to sort
local c = byte(symbols, i)
symlist[i] = { c = c, freq = freq[c], }
end
table.sort(symlist, -- sort selected symbols
function(v1, v2)
return v1.freq > v2.freq
end
)
local charlist = {} -- reconstitute the string
for i = 1, #symlist do
charlist[i] = char(symlist[i].c)
end
return table.concat(charlist)
end
--------------------------------------------------------------------
LETTERS = resort(LETTERS) -- change letter arrangement
ALPHANUM = resort(ALPHANUM)
end
----------------------------------------------------------------------
-- returns a string containing a new local variable name to use, and
-- a flag indicating whether it collides with a global variable
-- * trapping keywords and other names like 'self' is done elsewhere
----------------------------------------------------------------------
local function new_var_name()
local var
local cletters, calphanum = #LETTERS, #ALPHANUM
local v = var_new
if v < cletters then -- single char
v = v + 1
var = string.sub(LETTERS, v, v)
else -- longer names
local range, sz = cletters, 1 -- calculate # chars fit
repeat
v = v - range
range = range * calphanum
sz = sz + 1
until range > v
local n = v % cletters -- left side cycles faster
v = (v - n) / cletters -- do first char first
n = n + 1
var = string.sub(LETTERS, n, n)
while sz > 1 do
local m = v % calphanum
v = (v - m) / calphanum
m = m + 1
var = var..string.sub(ALPHANUM, m, m)
sz = sz - 1
end
end
var_new = var_new + 1
return var, globaluniq[var] ~= nil
end
----------------------------------------------------------------------
-- calculate and print some statistics
-- * probably better in main source, put here for now
----------------------------------------------------------------------
local function stats_summary(globaluniq, localuniq, afteruniq, option)
local print = print or base.print
local fmt = string.format
local opt_details = option.DETAILS
local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to, -- stats needed
decl_g, decl_li, decl_lo, decl_ti, decl_to,
token_g, token_li, token_lo, token_ti, token_to,
size_g, size_li, size_lo, size_ti, size_to
= 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
--------------------------------------------------------------------
-- collect statistics (note: globals do not have declarations!)
--------------------------------------------------------------------
for name, uniq in base.pairs(globaluniq) do
uniq_g = uniq_g + 1
token_g = token_g + uniq.token
size_g = size_g + uniq.size
end
for name, uniq in base.pairs(localuniq) do
uniq_li = uniq_li + 1
decl_li = decl_li + uniq.decl
token_li = token_li + uniq.token
size_li = size_li + uniq.size
end
for name, uniq in base.pairs(afteruniq) do
uniq_lo = uniq_lo + 1
decl_lo = decl_lo + uniq.decl
token_lo = token_lo + uniq.token
size_lo = size_lo + uniq.size
end
uniq_ti = uniq_g + uniq_li
decl_ti = decl_g + decl_li
token_ti = token_g + token_li
size_ti = size_g + size_li
uniq_to = uniq_g + uniq_lo
decl_to = decl_g + decl_lo
token_to = token_g + token_lo
size_to = size_g + size_lo
--------------------------------------------------------------------
-- detailed stats: global list
--------------------------------------------------------------------
if opt_details then
local sorted = {} -- sort table of unique global names by size
for name, uniq in base.pairs(globaluniq) do
uniq.name = name
sorted[#sorted + 1] = uniq
end
table.sort(sorted,
function(v1, v2)
return v1.size > v2.size
end
)
local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s"
local hl = string.rep("-", 44)
print("*** global variable list (sorted by size) ***\n"..hl)
print(fmt(tabf1, "Token", "Input", "Input", "Global"))
print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #sorted do
local uniq = sorted[i]
print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
end
print(hl)
print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
print(hl.."\n")
--------------------------------------------------------------------
-- detailed stats: local list
--------------------------------------------------------------------
local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s"
local hl = string.rep("-", 70)
print("*** local variable list (sorted by allocation order) ***\n"..hl)
print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global"))
print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #varlist do -- iterate according to order assigned
local name = varlist[i]
local uniq = afteruniq[name]
local old_t, old_s = 0, 0
for j = 1, #localinfo do -- find corresponding old names and calculate
local obj = localinfo[j]
if obj.name == name then
old_t = old_t + obj.xcount
old_s = old_s + obj.xcount * #obj.oldname
end
end
print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
uniq.size, avg(uniq.token, uniq.size), name))
end
print(hl)
print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
size_lo, avg(token_lo, size_lo), "TOTAL"))
print(hl.."\n")
end--if opt_details
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
local hl = string.rep("-", 58)
print("*** local variable optimization summary ***\n"..hl)
print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average"))
print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
print(hl)
print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
print(hl)
print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
print(hl)
print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
print(hl.."\n")
end
----------------------------------------------------------------------
-- main entry point
-- * does only local variable optimization for now
----------------------------------------------------------------------
function optimize(option, _toklist, _seminfolist, _globalinfo, _localinfo)
-- set tables
toklist, seminfolist, globalinfo, localinfo
= _toklist, _seminfolist, _globalinfo, _localinfo
var_new = 0 -- reset variable name allocator
varlist = {}
------------------------------------------------------------------
-- preprocess global/local tables, handle entropy reduction
------------------------------------------------------------------
globaluniq = preprocess(globalinfo)
localuniq = preprocess(localinfo)
if option["opt-entropy"] then -- for entropy improvement
recalc_for_entropy(option)
end
------------------------------------------------------------------
-- build initial declared object table, then sort according to
-- token count, this might help assign more tokens to more common
-- variable names such as 'e' thus possibly reducing entropy
-- * an object knows its localinfo index via its 'id' field
-- * special handling for "self" special local (parameter) here
------------------------------------------------------------------
local object = {}
for i = 1, #localinfo do
object[i] = localinfo[i]
end
table.sort(object, -- sort largest first
function(v1, v2)
return v1.xcount > v2.xcount
end
)
------------------------------------------------------------------
-- the special "self" function parameters must be preserved
-- * the allocator below will never use "self", so it is safe to
-- keep those implicit declarations as-is
------------------------------------------------------------------
local temp, j, gotself = {}, 1, false
for i = 1, #object do
local obj = object[i]
if not obj.isself then
temp[j] = obj
j = j + 1
else
gotself = true
end
end
object = temp
------------------------------------------------------------------
-- a simple first-come first-served heuristic name allocator,
-- note that this is in no way optimal...
-- * each object is a local variable declaration plus existence
-- * the aim is to assign short names to as many tokens as possible,
-- so the following tries to maximize name reuse
-- * note that we preserve sort order
------------------------------------------------------------------
local nobject = #object
while nobject > 0 do
local varname, gcollide
repeat
varname, gcollide = new_var_name() -- collect a variable name
until not SKIP_NAME[varname] -- skip all special names
varlist[#varlist + 1] = varname -- keep a list
local oleft = nobject
------------------------------------------------------------------
-- if variable name collides with an existing global, the name
-- cannot be used by a local when the name is accessed as a global
-- during which the local is alive (between 'act' to 'rem'), so
-- we drop objects that collides with the corresponding global
------------------------------------------------------------------
if gcollide then
-- find the xref table of the global
local gref = globalinfo[globaluniq[varname].id].xref
local ngref = #gref
-- enumerate for all current objects; all are valid at this point
for i = 1, nobject do
local obj = object[i]
local act, rem = obj.act, obj.rem -- 'live' range of local
-- if rem < 0, it is a -id to a local that had the same name
-- so follow rem to extend it; does this make sense?
while rem < 0 do
rem = localinfo[-rem].rem
end
local drop
for j = 1, ngref do
local p = gref[j]
if p >= act and p <= rem then drop = true end -- in range?
end
if drop then
obj.skip = true
oleft = oleft - 1
end
end--for
end--if gcollide
------------------------------------------------------------------
-- now the first unassigned local (since it's sorted) will be the
-- one with the most tokens to rename, so we set this one and then
-- eliminate all others that collides, then any locals that left
-- can then reuse the same variable name; this is repeated until
-- all local declaration that can use this name is assigned
-- * the criteria for local-local reuse/collision is:
-- A is the local with a name already assigned
-- B is the unassigned local under consideration
-- => anytime A is accessed, it cannot be when B is 'live'
-- => to speed up things, we have first/last accesses noted
------------------------------------------------------------------
while oleft > 0 do
local i = 1
while object[i].skip do -- scan for first object
i = i + 1
end
------------------------------------------------------------------
-- first object is free for assignment of the variable name
-- [first,last] gives the access range for collision checking
------------------------------------------------------------------
oleft = oleft - 1
local obja = object[i]
i = i + 1
obja.newname = varname
obja.skip = true
obja.done = true
local first, last = obja.first, obja.last
local xref = obja.xref
------------------------------------------------------------------
-- then, scan all the rest and drop those colliding
-- if A was never accessed then it'll never collide with anything
-- otherwise trivial skip if:
-- * B was activated after A's last access (last < act)
-- * B was removed before A's first access (first > rem)
-- if not, see detailed skip below...
------------------------------------------------------------------
if first and oleft > 0 then -- must have at least 1 access
local scanleft = oleft
while scanleft > 0 do
while object[i].skip do -- next valid object
i = i + 1
end
scanleft = scanleft - 1
local objb = object[i]
i = i + 1
local act, rem = objb.act, objb.rem -- live range of B
-- if rem < 0, extend range of rem thru' following local
while rem < 0 do
rem = localinfo[-rem].rem
end
--------------------------------------------------------
if not(last < act or first > rem) then -- possible collision
--------------------------------------------------------
-- B is activated later than A or at the same statement,
-- this means for no collision, A cannot be accessed when B
-- is alive, since B overrides A (or is a peer)
--------------------------------------------------------
if act >= obja.act then
for j = 1, obja.xcount do -- ... then check every access
local p = xref[j]
if p >= act and p <= rem then -- A accessed when B live!
oleft = oleft - 1
objb.skip = true
break
end
end--for
--------------------------------------------------------
-- A is activated later than B, this means for no collision,
-- A's access is okay since it overrides B, but B's last
-- access need to be earlier than A's activation time
--------------------------------------------------------
else
if objb.last and objb.last >= obja.act then
oleft = oleft - 1
objb.skip = true
end
end
end
--------------------------------------------------------
if oleft == 0 then break end
end
end--if first
------------------------------------------------------------------
end--while
------------------------------------------------------------------
-- after assigning all possible locals to one variable name, the
-- unassigned locals/objects have the skip field reset and the table
-- is compacted, to hopefully reduce iteration time
------------------------------------------------------------------
local temp, j = {}, 1
for i = 1, nobject do
local obj = object[i]
if not obj.done then
obj.skip = false
temp[j] = obj
j = j + 1
end
end
object = temp -- new compacted object table
nobject = #object -- objects left to process
------------------------------------------------------------------
end--while
------------------------------------------------------------------
-- after assigning all locals with new variable names, we can
-- patch in the new names, and reprocess to get 'after' stats
------------------------------------------------------------------
for i = 1, #localinfo do -- enumerate all locals
local obj = localinfo[i]
local xref = obj.xref
if obj.newname then -- if got new name, patch it in
for j = 1, obj.xcount do
local p = xref[j] -- xrefs indexes the token list
seminfolist[p] = obj.newname
end
obj.name, obj.oldname -- adjust names
= obj.newname, obj.name
else
obj.oldname = obj.name -- for cases like 'self'
end
end
------------------------------------------------------------------
-- deal with statistics output
------------------------------------------------------------------
if gotself then -- add 'self' to end of list
varlist[#varlist + 1] = "self"
end
local afteruniq = preprocess(localinfo)
stats_summary(globaluniq, localuniq, afteruniq, option)
------------------------------------------------------------------
end
end)
--[[--------------------------------------------------------------------
LuaSrcDiet
Compresses Lua source code by removing unnecessary characters.
For Lua 5.1.x source code.
Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- NOTES:
-- * Remember to update version and date information below (MSG_TITLE)
-- * TODO: to implement pcall() to properly handle lexer etc. errors
-- * TODO: verify token stream or double-check binary chunk?
-- * TODO: need some automatic testing for a semblance of sanity
-- * TODO: the plugin module is highly experimental and unstable
----------------------------------------------------------------------]]
-- standard libraries, functions
local string = string
local math = math
local table = table
local require = require
local print = print
local sub = string.sub
local gmatch = string.gmatch
-- support modules
local llex = require "llex"
local lparser = require "lparser"
local optlex = require "optlex"
local optparser = require "optparser"
local plugin
--[[--------------------------------------------------------------------
-- messages and textual data
----------------------------------------------------------------------]]
local MSG_TITLE = [[
LuaSrcDiet: Puts your Lua 5.1 source code on a diet
Version 0.11.2 (20080608) Copyright (c) 2005-2008 Kein-Hong Man
The COPYRIGHT file describes the conditions under which this
software may be distributed.
]]
local MSG_USAGE = [[
usage: LuaSrcDiet [options] [filenames]
example:
>LuaSrcDiet myscript.lua -o myscript_.lua
options:
-v, --version prints version information
-h, --help prints usage information
-o <file> specify file name to write output
-s <suffix> suffix for output files (default '_')
--keep <msg> keep block comment with <msg> inside
--plugin <module> run <module> in plugin/ directory
- stop handling arguments
(optimization levels)
--none all optimizations off (normalizes EOLs only)
--basic lexer-based optimizations only
--maximum maximize reduction of source
(informational)
--quiet process files quietly
--read-only read file and print token stats only
--dump-lexer dump raw tokens from lexer to stdout
--dump-parser dump variable tracking tables from parser
--details extra info (strings, numbers, locals)
features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]
------------------------------------------------------------------------
-- optimization options, for ease of switching on and off
-- * positive to enable optimization, negative (no) to disable
-- * these options should follow --opt-* and --noopt-* style for now
------------------------------------------------------------------------
local OPTION = [[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
]]
-- preset configuration
local DEFAULT_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-numbers --opt-locals
]]
-- override configurations: MUST explicitly enable/disable everything
local BASIC_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals
]]
local MAXIMUM_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-eols --opt-strings --opt-numbers
--opt-locals --opt-entropy
]]
local NONE_CONFIG = [[
--noopt-comments --noopt-whitespace --noopt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals
]]
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins
--[[--------------------------------------------------------------------
-- startup and initialize option list handling
----------------------------------------------------------------------]]
-- simple error message handler; change to error if traceback wanted
local function die(msg)
print("LuaSrcDiet: "..msg); os.exit()
end
--die = error--DEBUG
if not string.match(_VERSION, "5.1", 1, 1) then -- sanity check
die("requires Lua 5.1 to run")
end
------------------------------------------------------------------------
-- prepares text for list of optimizations, prepare lookup table
------------------------------------------------------------------------
local MSG_OPTIONS = ""
do
local WIDTH = 24
local o = {}
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
local msg = " "..op
msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n"
MSG_OPTIONS = MSG_OPTIONS..msg
o[op] = true
o["--no"..sub(op, 3)] = true
end
OPTION = o -- replace OPTION with lookup table
end
MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
------------------------------------------------------------------------
-- global variable initialization, option set handling
------------------------------------------------------------------------
local suffix = DEFAULT_SUFFIX -- file suffix
local option = {} -- program options
local stat_c, stat_l -- statistics tables
-- function to set option lookup table based on a text list of options
-- note: additional forced settings for --opt-eols is done in optlex.lua
local function set_options(CONFIG)
for op in gmatch(CONFIG, "(%-%-%S+)") do
if sub(op, 3, 4) == "no" and -- handle negative options
OPTION["--"..sub(op, 5)] then
option[sub(op, 5)] = false
else
option[sub(op, 3)] = true
end
end
end
--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]
-- list of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display
local TTYPES = {
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
"TK_STRING", "TK_LSTRING", "TK_OP",
"TK_EOS",
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
"TK_EOL", "TK_SPACE",
}
local TTYPE_GRAMMAR = 7
local EOLTYPES = { -- EOL names for token dump
["\n"] = "LF", ["\r"] = "CR",
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
}
------------------------------------------------------------------------
-- read source code from file
------------------------------------------------------------------------
local function load_file(fname)
local INF = io.open(fname, "rb")
if not INF then die("cannot open \""..fname.."\" for reading") end
local dat = INF:read("*a")
if not dat then die("cannot read from \""..fname.."\"") end
INF:close()
return dat
end
------------------------------------------------------------------------
-- save source code to file
------------------------------------------------------------------------
local function save_file(fname, dat)
local OUTF = io.open(fname, "wb")
if not OUTF then die("cannot open \""..fname.."\" for writing") end
local status = OUTF:write(dat)
if not status then die("cannot write to \""..fname.."\"") end
OUTF:close()
end
------------------------------------------------------------------------
-- functions to deal with statistics
------------------------------------------------------------------------
-- initialize statistics table
local function stat_init()
stat_c, stat_l = {}, {}
for i = 1, #TTYPES do
local ttype = TTYPES[i]
stat_c[ttype], stat_l[ttype] = 0, 0
end
end
-- add a token to statistics table
local function stat_add(tok, seminfo)
stat_c[tok] = stat_c[tok] + 1
stat_l[tok] = stat_l[tok] + #seminfo
end
-- do totals for statistics table, return average table
local function stat_calc()
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
local stat_a = {}
local c, l = 0, 0
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
end
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
stat_a.TOTAL_TOK = avg(c, l)
c, l = 0, 0
for i = 1, #TTYPES do -- total all tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
end
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
stat_a.TOTAL_ALL = avg(c, l)
return stat_a
end
--[[--------------------------------------------------------------------
-- main tasks
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- a simple token dumper, minimal translation of seminfo data
------------------------------------------------------------------------
local function dump_tokens(srcfl)
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z = load_file(srcfl)
llex.init(z)
llex.llex()
local toklist, seminfolist = llex.tok, llex.seminfo
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
if tok == "TK_OP" and string.byte(seminfo) < 32 then
seminfo = "(".. string.byte(seminfo)..")"
elseif tok == "TK_EOL" then
seminfo = EOLTYPES[seminfo]
else
seminfo = "'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end
----------------------------------------------------------------------
-- parser dump; dump globalinfo and localinfo tables
----------------------------------------------------------------------
local function dump_parser(srcfl)
local print = print
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z = load_file(srcfl)
llex.init(z)
llex.llex()
local toklist, seminfolist, toklnlist
= llex.tok, llex.seminfo, llex.tokln
--------------------------------------------------------------------
-- do parser optimization here
--------------------------------------------------------------------
lparser.init(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = lparser.parser()
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local hl = string.rep("-", 72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i = 1, #globalinfo do
local obj = globalinfo[i]
local msg = "("..i..") '"..obj.name.."' -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
-- local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref)
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i = 1, #localinfo do
local obj = localinfo[i]
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.isself then
msg = msg.." isself"
end
msg = msg.." -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
print(hl.."\n")
end
------------------------------------------------------------------------
-- reads source file(s) and reports some statistics
------------------------------------------------------------------------
local function read_only(srcfl)
local print = print
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z = load_file(srcfl)
llex.init(z)
llex.llex()
local toklist, seminfolist = llex.tok, llex.seminfo
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
--------------------------------------------------------------------
-- collect statistics
--------------------------------------------------------------------
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local fmt = string.format
local function figures(tt)
return stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
local hl = string.rep("-", 42)
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl.."\n")
end
------------------------------------------------------------------------
-- process source file(s), write output and reports some statistics
------------------------------------------------------------------------
local function process_file(srcfl, destfl)
local function print(...) -- handle quiet option
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then -- plugin init
option.EXIT = false
plugin.init(option, srcfl, destfl)
if option.EXIT then return end
end
print(MSG_TITLE) -- title message
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z = load_file(srcfl)
if plugin and plugin.post_load then -- plugin post-load
z = plugin.post_load(z) or z
if option.EXIT then return end
end
llex.init(z)
llex.llex()
local toklist, seminfolist, toklnlist
= llex.tok, llex.seminfo, llex.tokln
if plugin and plugin.post_lex then -- plugin post-lex
plugin.post_lex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
--------------------------------------------------------------------
-- collect 'before' statistics
--------------------------------------------------------------------
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat1_a = stat_calc()
local stat1_c, stat1_l = stat_c, stat_l
--------------------------------------------------------------------
-- do parser optimization here
--------------------------------------------------------------------
if option["opt-locals"] then
optparser.print = print -- hack
lparser.init(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = lparser.parser()
if plugin and plugin.post_parse then -- plugin post-parse
plugin.post_parse(globalinfo, localinfo)
if option.EXIT then return end
end
optparser.optimize(option, toklist, seminfolist, globalinfo, localinfo)
if plugin and plugin.post_optparse then -- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
end
--------------------------------------------------------------------
-- do lexer optimization here, save output file
--------------------------------------------------------------------
optlex.print = print -- hack
toklist, seminfolist, toklnlist
= optlex.optimize(option, toklist, seminfolist, toklnlist)
if plugin and plugin.post_optlex then -- plugin post-optlex
plugin.post_optlex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
local dat = table.concat(seminfolist)
-- depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning
if string.find(dat, "\r\n", 1, 1) or
string.find(dat, "\n\r", 1, 1) then
optlex.warn.mixedeol = true
end
-- save optimized source stream to output file
save_file(destfl, dat)
--------------------------------------------------------------------
-- collect 'after' statistics
--------------------------------------------------------------------
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local fmt = string.format
local function figures(tt)
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl = string.rep("-", 68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1, "Lexical",
"Input", "Input", "Input",
"Output", "Output", "Output"))
print(fmt(tabf1, "Elements",
"Count", "Bytes", "Average",
"Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl)
--------------------------------------------------------------------
-- report warning flags from optimizing process
--------------------------------------------------------------------
if optlex.warn.lstring then
print("* WARNING: "..optlex.warn.lstring)
elseif optlex.warn.mixedeol then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
end
print()
end
--[[--------------------------------------------------------------------
-- main functions
----------------------------------------------------------------------]]
local arg = {...} -- program arguments
local fspec = {}
set_options(DEFAULT_CONFIG) -- set to default options at beginning
------------------------------------------------------------------------
-- per-file handling, ship off to tasks
------------------------------------------------------------------------
local function do_files(fspec)
for _, srcfl in ipairs(fspec) do
local destfl
------------------------------------------------------------------
-- find and replace extension for filenames
------------------------------------------------------------------
local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
local basename, extension = srcfl, ""
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
extension = sub(srcfl, extb, exte)
end
destfl = basename..suffix..extension
if #fspec == 1 and option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
die("output filename identical to input filename")
end
------------------------------------------------------------------
-- perform requested operations
------------------------------------------------------------------
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl, destfl)
end
end--for
end
------------------------------------------------------------------------
-- main function (entry point is after this definition)
------------------------------------------------------------------------
local function main()
local argn, i = #arg, 1
if argn == 0 then
option.HELP = true
end
--------------------------------------------------------------------
-- handle arguments
--------------------------------------------------------------------
while i <= argn do
local o, p = arg[i], arg[i + 1]
local dash = string.match(o, "^%-%-?")
if dash == "-" then -- single-dash options
if o == "-h" then
option.HELP = true; break
elseif o == "-v" then
option.VERSION = true; break
elseif o == "-s" then
if not p then die("-s option needs suffix specification") end
suffix = p
i = i + 1
elseif o == "-o" then
if not p then die("-o option needs a file name") end
option.OUTPUT_FILE = p
i = i + 1
elseif o == "-" then
break -- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash == "--" then -- double-dash options
if o == "--help" then
option.HELP = true; break
elseif o == "--version" then
option.VERSION = true; break
elseif o == "--keep" then
if not p then die("--keep option needs a string to match for") end
option.KEEP = p
i = i + 1
elseif o == "--plugin" then
if not p then die("--plugin option needs a module name") end
if option.PLUGIN then die("only one plugin can be specified") end
option.PLUGIN = p
plugin = require(PLUGIN_SUFFIX..p)
i = i + 1
elseif o == "--quiet" then
option.QUIET = true
elseif o == "--read-only" then
option.READ_ONLY = true
elseif o == "--basic" then
set_options(BASIC_CONFIG)
elseif o == "--maximum" then
set_options(MAXIMUM_CONFIG)
elseif o == "--none" then
set_options(NONE_CONFIG)
elseif o == "--dump-lexer" then
option.DUMP_LEXER = true
elseif o == "--dump-parser" then
option.DUMP_PARSER = true
elseif o == "--details" then
option.DETAILS = true
elseif OPTION[o] then -- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec + 1] = o -- potential filename
end
i = i + 1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE); return true
elseif option.VERSION then
print(MSG_TITLE); return true
end
if #fspec > 0 then
if #fspec > 1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end
-- entry point -> main() -> do_files()
if not main() then
die("Please run with option -h or --help for usage information")
end
-- end of script