From e4741cd571b2b0cce6ef673ef0711ce1f4ea5ca3 Mon Sep 17 00:00:00 2001 From: Sam Roxanne Date: Fri, 25 Jun 2021 04:31:33 -0500 Subject: [PATCH] Complete refactor of the backend. --- src/application.lua | 15 +- src/ast2.lua | 449 +++++++++++++++++++++++++++++++++++++ src/directives/include.lua | 5 +- src/generator2.lua | 72 ++++++ src/libluacomp.lua | 28 +++ src/luacomp_vars.lua | 6 +- src/shell_var.lua | 18 +- src/test.lua | 10 + 8 files changed, 578 insertions(+), 25 deletions(-) create mode 100644 src/ast2.lua create mode 100644 src/generator2.lua create mode 100644 src/libluacomp.lua create mode 100644 src/test.lua diff --git a/src/application.lua b/src/application.lua index 5ec863c..a545b09 100644 --- a/src/application.lua +++ b/src/application.lua @@ -3,19 +3,18 @@ local function dprint(...) for i=1, #args do args[i] = tostring(args[i]) end - if (VERBOSE) then + if (false) then io.stderr:write("DEBUG\t"..table.concat(args,"\t"),"\n") end end --#include "src/shell_var.lua" --#include "src/luacomp_vars.lua" ---#include "src/ast.lua" ---#include "src/generator.lua" +--#include "src/libluacomp.lua" --#include "src/directive_provider.lua" --#include "src/cfg/minifier_providers.lua" -local parser = argparse(arg[0], "LuaComp v"..LUACOMP_VERSION.."\nA preprocessor+postprocessor written in Lua.") +local parser = argparse(arg[0]:match("[^/]+$"), "LuaComp v"..LUACOMP_VERSION.."\nA preprocessor+postprocessor written in Lua.") parser:argument("input", "Input file (- for STDIN)") parser:option("-O --output", "Output file. (- for STDOUT)", "-") parser:option("-m --minifier", "Sets the postprocessor", "none") @@ -60,11 +59,7 @@ if (file ~= "-") then else f = io.stdin end -dprint("Generating AST...") -local ast = mkast(f, file) -ast.file = file -dprint("Generating code...") -local ocode = generate(ast, args.generator_code) +local ocode = luacomp.process_file(f, (file == "-") and "stdin" or file, args.generator_code) local minifier = providers[args.minifier] dprint("Minifier: "..args.minifier, minifier) @@ -96,4 +91,4 @@ if (args.executable) then end of:write(rcode) of:close() -f:close() \ No newline at end of file +--f:close() \ No newline at end of file diff --git a/src/ast2.lua b/src/ast2.lua new file mode 100644 index 0000000..7baeaaa --- /dev/null +++ b/src/ast2.lua @@ -0,0 +1,449 @@ +-- AST Generator v2: Belkan Boogaloo +-- Hopefully faster than v1 +local ast = {} +do + local ws = "\t " + + function ast.str_to_stream(str, file) + local s = { + str = str, + pos = 1, + file = file or "(unknown)" + } + function s:next(c) + c = c or 1 + --dprint(c) + local d = self.str:sub(self.pos, self.pos+c-1) + self.pos = self.pos + c + return d + end + + function s:peek(c) + c = c or 1 + if (c < 0) then + return self.str:sub(self.pos+c, self.pos-1) + end + return self.str:sub(self.pos, self.pos+c-1) + end + + function s:rewind(c) + c = c or 1 + self.pos = self.pos - c + return self.pos + end + + function s:skip(c) + c = c or 1 + self.pos = self.pos + c + return self.pos + end + + function s:set(c) + --dprint(c) + self.pos = c or self.pos + return self.pos + end + + function s:tell() + return self.pos + end + + function s:size() + return #self.str + end + + function s:next_instance(pat, raw) + local st, en = self.str:find(pat, self.pos, raw) + if not st then return nil, "not found" end + self.pos = en+1 + return self.str:sub(st, en) + end + + function s:get_yx() -- it *is* yx + local pos = 0 + local line = 1 + while pos < self.pos do + local newpos = self.str:find("\n", pos+1) + if not newpos then return line+1, 0 end + if newpos > self.pos then + return line, self.pos-pos + end + line = line + 1 + pos = newpos + end + return line, 1 + end + + return s + end + + local esct = { + ["t"] = "\t", + ["n"] = "\n", + ["r"] = "\r", + --["\\"] = "\\\\" + } + + function ast.parser_error(str, err) + local y, x = str:get_yx() + --print(y, x) + io.stderr:write(string.format("%s(%d:%d): %s\n", str.file, y or 0, x or 0, err)) + os.exit(1) + end + + function ast.unescape(escaped_string) + local i = 1 + local out_string = "" + while i <= #escaped_string do + local c = escaped_string:sub(i,i) + if (c == "\\") then + i = i + 1 + local nc = escaped_string:sub(i,i) + if esct[nc] then + out_string = out_string .. esct[nc] + else + out_string = out_string .. nc + end + else + out_string = out_string .. c + end + i = i + 1 + end + return out_string + end + + function ast.remove_escapes(escaped_string) + local i = 1 + local out_string = "" + while i < #escaped_string do + local c = escaped_string:sub(i,i) + if (c == "\\") then + i = i + 1 + else + out_string = out_string .. c + end + i = i + 1 + end + return out_string + end + + function ast.back_escape_count(str, start) + local i=1 + while str:peek(-i):sub(1,1) == "\\" do + i = i + 1 + if (str:tell()-i < start) then + ast.error(str, "internal error") + end + end + return str:peek(1-i) + end + + function ast.parse_quote(str) + local spos = str:tell() + while true do + if not str:next_instance("\'") then + ast.parser_error(str, "unclosed string") + end + local rpos = str:tell() + str:set(spos) + if str:next_instance("\n") then + if rpos > str:tell() then + ast.parser_error(str, "unclosed string") + end + end + str:set(rpos) + if str:peek(-1) == "\\" then + local parsed = ast.remove_escapes(ast.back_unescape(str)) + if parsed:sub(#parsed) == "\'" then + goto found_end + end + else + goto found_end + end + end + ::found_end:: + local epos = str:tell() + local amt = epos-spos-1 + str:set(spos) + local esc = str:next(amt) + str:skip(1) + return ast.unescape(esc) + end + + function ast.parse_dblquote(str) + local spos = str:tell() + while true do + if not str:next_instance("\"") then + ast.parser_error(str, "unclosed string") + end + local rpos = str:tell() + str:set(spos) + if str:next_instance("\n") then + if rpos > str:tell() then + ast.parser_error(str, "unclosed string") + end + end + str:set(rpos) + if str:peek(-1) == "\\" then + local parsed = ast.remove_escapes(ast.back_unescape(str)) + if parsed:sub(#parsed) == "\"" then + goto found_end + end + else + goto found_end + end + end + ::found_end:: + local epos = str:tell() + local amt = epos-spos-1 + str:set(spos) + --dprint(spos, amt) + local esc = str:next(amt) + --print(esc) + str:skip(1) + return ast.unescape(esc) + end + + function ast.parse_hex(str) + local hex = str:next_instance("%x+") + if not hex then + ast.parser_error(str, "internal error") + end + return tonumber(hex, 16) + end + + function ast.parse_number(str) + local num = str:next_instance("%d+") + if not num then + ast.parser_error(str, "internal error") + end + return tonumber(num, 10) + end + + function ast.parse_envvar(str) + local name = str:next_instance("[^)]+") + if not name then + ast.parser_error(str, "unclosed shell var") + end + str:skip(1) + return name + end + + -- [{...}] + function ast.parse_span(str) + local spos = str:tell() + if not str:next_instance("}]", true) then + ast.parser_error(str, "unclosed block") + else + local rpos = str:tell() + str:set(spos) + if str:next_instance("\n") then + if str:tell() < rpos then + str:set(spos) + ast.parser_error(str, "unclosed span") + end + end + str:set(rpos) + end + local epos = str:tell() + str:set(spos) + local data = str:next(epos-spos-2) + str:skip(2) + return data + end + + -- [[...]] + function ast.parse_block(str) + local spos = str:tell() + if not str:next_instance("]]") then + ast.parser_error(str, "unclosed block") + end + local epos = str:tell() + str:set(spos) + local data = str:next(epos-spos-2) + str:skip(2) + return data + end + + function ast.parse_directive(str) -- And now we start getting more complex. + local name = str:next_instance("[^ ]+") + local args = {} + while true do + local spos = str:tell() + if not str:next_instance(" +") then + break + else + local rpos = str:tell() + if str:next_instance("\n") then + if str:tell() < rpos then + str:set(spos) + break + end + str:set(rpos) + end + end + local apos = str:tell() + if str:peek(2) == "0x" then + str:skip(2) + local n = ast.parse_hex(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed hex") + end + table.insert(args, n) + elseif str:peek():find("%d") then + local n = ast.parse_number(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed number") + end + table.insert(args, n) + elseif str:peek() == "\"" then + str:skip(1) + local sval = ast.parse_dblquote(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed string, got "..c) + end + table.insert(args, sval) + elseif str:peek() == "\'" then + str:skip(1) + local sval = ast.parse_quote(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed string") + end + table.insert(args, sval) + elseif str:peek(2) == "$".."(" then -- i have to avoid the funny + str:skip(2) + local sval = ast.parse_envvar(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed argument") + end + table.insert(args, {type="evar", val=sval}) + elseif str:peek(3) == "@".."[{" then + str:skip(3) + local sval = ast.parse_span(str) + local c = str:peek() + if c ~= " " and c ~= "\n" and c ~= "" then + str:set(apos) + ast.parser_error(str, "malformed code block, got "..c) + end + table.insert(args, {type="lua_span", val=sval}) + elseif str:peek() == "\n" then + break + else + ast.parser_error(str, "unknown arg type") + end + if str:peek() == "\n" then + break + end + end + return { + type="directive", + name = name, + args = args + } + end + + function ast.find_first(str, onfind, ...) + local t = table.pack(...) + local spos = str:tell() + local epos = math.huge + local ematch + for i=1, t.n do + str:set(spos) + local m = str:next_instance(t[i], true) + if m then + if str:tell() < epos then + if onfind then + if not onfind(str, m) then goto continue end + end + epos = str:tell() + ematch = m + end + end + ::continue:: + end + if ematch then + str:set(epos) + else + str:set(spos) + end + return ematch + end + + -- And now we parse + function ast.parse(str) + local cast = {} + while true do + local spos = str:tell() + --dprint("searching") + local match = ast.find_first(str, function(str, submatch) + if (submatch == "--#") then + --dprint("directive?") + local i=4 + while true do + if str:peek(-i):sub(1,1) == "\n" or str:peek(-i):sub(1,1) == "" or str:tell() == 4 then + --dprint("found newline, we're cool") + return true + elseif not ws:find(str:peek(-i):sub(1,1)) then + --dprint("found non-whitespace character "..string.byte(str:peek(-i):sub(1,1))..str:peek(-i):sub(1,1)) + return false + end + i = i + 1 + end + end + return true + end, "--#", "$".."[[", "@".."[[", "$".."[{", "@".."[{", "$".."(") -- trust me, this was needed + --dprint("searched") + if not match then + --dprint("not found") + table.insert(cast, {type="content", val=str:next(str:size())}) + break + end + local epos = str:tell() + local size = (epos-#match)-spos + if size > 0 then + str:set(spos) + local chunk = str:next(size) + if not chunk:match("^%s+$") then + table.insert(cast, {type="content", val=chunk}) + end + str:skip(#match) + end + --dprint("match: "..match) + if match == "--#" then + --str:skip(3) + table.insert(cast, ast.parse_directive(str)) + elseif match == "$".."[[" then + local blk = ast.parse_block(str) + table.insert(cast, {type="shell_block", val=blk}) + elseif match == "@".."[[" then + local blk = ast.parse_block(str) + table.insert(cast, {type="lua_block", val=blk}) + elseif match == "$".."[{" then + local span = ast.parse_span(str) + table.insert(cast, {type="shell_span", val=span}) + elseif match == "@".."[{" then + local span = ast.parse_span(str) + --print(span) + table.insert(cast, {type="lua_span", val=span}) + elseif match == "$".."(" then + local var = ast.parse_envvar(str) + table.insert(cast, {type="evar", val=var}) + else + ast.parser_error(str, "what") + end + --dprint("Parsed") + end + + return cast + end +end diff --git a/src/directives/include.lua b/src/directives/include.lua index e4f8e0b..a98bc82 100644 --- a/src/directives/include.lua +++ b/src/directives/include.lua @@ -2,10 +2,11 @@ function directives.include(env, file) if (not os.execute("stat "..file..">/dev/null")) then return false, "File `"..file.."' does not exist!" end - local f = io.open(file, "r") + --[[local f = io.open(file, "r") local fast = mkast(f, file) fast.file = file local code = generate(fast) - env.code = env.code .. code .. "\n" + env.code = env.code .. code .. "\n"]] + env.code = env.code .. luacomp.process_file(file, file) .. "\n" return true end \ No newline at end of file diff --git a/src/generator2.lua b/src/generator2.lua new file mode 100644 index 0000000..9481a5c --- /dev/null +++ b/src/generator2.lua @@ -0,0 +1,72 @@ +-- Generator v2: Borderless Edition + +local generator = {} + +do + function generator.parse_ast(file, ast) + local gcode = "" + for i=1, #ast do + local leaf = ast[i] + if leaf.type == "directive" then + gcode = gcode .. string.format("call_directive(%q,", leaf.name) + local pargs = {} + for i=1, #leaf.args do + if type(leaf.args[i]) ~= "table" then + table.insert(pargs, string.format("%q", leaf.args[i])) + elseif leaf.args[i].type == "lua_span" then + table.insert(pargs, leaf.args[i].val) + elseif leaf.args[i].type == "evar" then + table.insert(pargs, string.format("svar.get(%q)", leaf.args[i].val)) + end + end + gcode = gcode .. table.concat(pargs, ",")..")\n" + elseif leaf.type == "lua_block" then + gcode = gcode .. leaf.val .. "\n" + elseif leaf.type == "shell_block" then + gcode = gcode .. string.format("shell_write(%q)\n", leaf.val) + elseif leaf.type == "content" then + gcode = gcode .. string.format("write_out(%q)\n", leaf.val) + elseif leaf.type == "lua_span" then + gcode = gcode .. "write_out("..leaf.val..")\n" + elseif leaf.type == "shell_span" then + gcode = gcode .. string.format("write_out(svar.get(%q))\n", leaf.val) + elseif leaf.type == "evar" then + gcode = gcode .. string.format("write_out(string.format(\"%%q\", svar.get(%q)))\n", leaf.val) + end + end + return gcode + end + + function generator.run_gcode(fname, gcode) + local env = {code = ""} + local fenv = {} + for k, v in pairs(_G) do + fenv[k] = v + end + fenv._G = fenv + fenv._GENERATOR = env + function fenv.call_directive(dname, ...) + if not directives[dname] then error("invalid directive "..dname) end + local r, er = directives[dname](env, ...) + assert(r, er) + end + + function fenv.write_out(code) + env.code = env.code .. code + end + + function fenv.shell_write(cmd) + local tmpname = os.tmpname() + local f = io.open(tmpname, "w") + f:write(cmd) + f:close() + local h = io.popen(os.getenv("SHELL").." "..tmpname, "r") + env.code = env.code .. h:read("*a") + h:close() + end + + assert(load(gcode, "="..fname, "t", fenv))() + + return env.code + end +end \ No newline at end of file diff --git a/src/libluacomp.lua b/src/libluacomp.lua new file mode 100644 index 0000000..7e1f97c --- /dev/null +++ b/src/libluacomp.lua @@ -0,0 +1,28 @@ +-- A LuaComp library +local luacomp = {} + +local directives = {} + +--#include "src/ast2.lua" +--#include "src/generator2.lua" + +function luacomp.process_file(file, fname, dry) + io.stderr:write("PROC\t", fname, "\n") + if type(file) == "string" then + file = io.open(file, "r") + end + local d = file:read("*a"):gsub("\r\n", "\n"):gsub("\r", "\n") + file:close() + return luacomp.process_string(d, fname or file, dry) +end + +function luacomp.process_string(str, name, dry) + local str = ast.str_to_stream(str, name) + local cast = ast.parse(str) + local gcode = generator.parse_ast(name, cast) + if dry then + return gcode + end + --error("TODO: implement generation") + return generator.run_gcode(name, gcode) +end \ No newline at end of file diff --git a/src/luacomp_vars.lua b/src/luacomp_vars.lua index 125b306..f472960 100644 --- a/src/luacomp_vars.lua +++ b/src/luacomp_vars.lua @@ -4,8 +4,8 @@ local function _sv(k, v) --os.setenv(k, tostring(v)) end -_sv("LUACOMP_V_MAJ", 1) -_sv("LUACOMP_V_MIN", 2) -_sv("LUACOMP_V_PAT", 2) +_sv("LUACOMP_V_MAJ", 2) +_sv("LUACOMP_V_MIN", 0) +_sv("LUACOMP_V_PAT", 0) _sv("LUACOMP_VERSION", LUACOMP_V_MAJ.."."..LUACOMP_V_MIN.."."..LUACOMP_V_PAT) _sv("LUACOMP_NAME", "LuaComp") \ No newline at end of file diff --git a/src/shell_var.lua b/src/shell_var.lua index 328be7a..20cdd87 100644 --- a/src/shell_var.lua +++ b/src/shell_var.lua @@ -1,15 +1,13 @@ svar = {} -local svars = {} +do + local stdlib = require("posix.stdlib") -function svar.get(var) - return svars[var] or os.getenv(var) -end + function svar.get(var) + return os.getenv(var) + end -function svar.set(var, val) - svars[var] = tostring(val) -end - -function svar.get_all() - return svars + function svar.set(var, val) + return stdlib.setenv(var, val and tostring(val) or nil) + end end \ No newline at end of file diff --git a/src/test.lua b/src/test.lua new file mode 100644 index 0000000..384f2c8 --- /dev/null +++ b/src/test.lua @@ -0,0 +1,10 @@ +--#include "test" +--#fuck @[{test}] $(test) 1245 0xFF +$(test) +$[[test]] +$[{test}] +@[{test}] +@[[if test then test() end]] +@[[for i=1, 10 do]] + print("hi @[{i}]") +@[[end]] \ No newline at end of file