#!/usr/bin/luajit -- A microscopic translator from Lua-like code to a stack bytecode. -- Here we implement a fairly minimal PEG parser. utra = {} -- Literal. function utra.lit(s) local n = string.len(s) return { lit = s, -- for debugging parse = function(text, offset) if string.sub(text, offset, offset+n-1) == s then return {offset=offset+n, val=s} end end, } end -- Alternation. function utra.alt(a, b) return { alta = a, altb = b, parse = function(text, offset) local ar = a.parse(text, offset) if ar then return ar end return b.parse(text, offset) end, } end -- Repetition. function utra.rep(n) return { rep = n, parse = function(text, offset) local results = {} repeat local result = n.parse(text, offset) if not result then return {offset=offset, val={items=results}} end table.insert(results, result.val) offset = result.offset until false end, } end -- Indirection to form cycles. function utra.thunk(f) return { thunk = f, parse = function(text, offset) return f().parse(text, offset) end, } end -- Transform a value, typically into a table. If you transform it -- into a non-table you may regret it. function utra.map(kid, f) return { map = kid, f = f, parse = function(text, offset) local v = kid.parse(text, offset) if not v then return v end return {offset=v.offset, val=f(v.val)} end, } end -- What I did in -- was to -- have labels: -- -- labeled <- label: name _ ':'_ value: term -- -> ([value, ' if (state) var ', label, ' = state.val;\n'].join('')) -- -- and result expressions at the ends of sequences: -- -- sequence <- foo: term bar: sequence -- -> ([foo, ' if (state) {\n', bar, ' }\n'].join('')) -- / result_expression / -> (''). -- -- result_expression <- '->'_ result: expr _ -- -> ([' if (state) state.val = ', result, ';\n'].join('')) -- -- I wonder if in Lua it might be easier to compile parsers at runtime -- with an approach like that... but for now the interpretive approach -- is: function utra.tag(tag, kid) return utra.map(kid, function(v) return {[tag]=v} end) end -- Concatenation. function utra.cat(a, b) if b == nil then error("can't concatenate "..tostring(a).." to nil, bozo") end return { cata = a, catb = b, parse = function(text, offset) local ar = a.parse(text, offset) if not ar then return ar end local br = b.parse(text, ar.offset) if not br then return br end -- Success; now we must merge the values. if type(ar.val) == 'table' then -- If either value is a table, it “wins”. if type(br.val) == 'string' then return {offset=br.offset, val=ar.val} end for k, v in pairs(ar.val) do br.val[k] = v end return br else if type(br.val) == 'table' then return br end return {offset=br.offset, val=ar.val..br.val} end end, } end -- A more efficient way to do that might be to pass a table to each -- parse function into which it could jam crap. Maybe later. -- character classes: pass in a table with keys that indicate the acceptable chars function utra.class(class) return { class = class, parse = function(text, offset) local s = string.sub(text, offset, offset) if class[s] then return {offset=offset+1, val=s} else return nil end end } end local function is_ident(s) if type(s) ~= 'string' then return false end local i, j = string.find(s, '%a+') return i == 1 and j == #s end -- Recursively print out an object, using `prefix` for lines after the -- first. For now handles only acyclic structures. function utra.dump(obj, output, prefix) if prefix == nil then prefix = '' end if output == nil then output = io.write end local t = type(obj) if t == 'table' then output('{') local nprefix = prefix..' ' for k, v in pairs(obj) do if is_ident(k) then output('\n', nprefix, k, ' = ') else output('\n', nprefix, '[') utra.dump(k, output, nprefix..' ') output('] = ') end utra.dump(v, output, nprefix) output(',') end output('\n', prefix, '}') elseif t == 'string' then output(string.format('%q', obj)) else -- e.g., functions and numbers output(string.format('%s', obj)) end end function utra.range(start, stop) local class = {} for i = string.byte(start), string.byte(stop) do class[string.char(i)] = 1 end return utra.class(class) end function utra.joinrep(kid) return utra.map(utra.rep(kid), function(v) return table.concat(v.items) end) end function utra.discard(kid) return utra.map(kid, function(v) return '' end) end -- Test parser. local num = utra.range('0', '9') local letter = utra.alt(utra.alt(utra.range('a', 'z'), utra.range('A', 'Z')), utra.lit('_')) local whitespace = utra.discard(utra.rep(utra.lit(' '))) local ident = utra.tag('ident', utra.cat(utra.cat(letter, utra.joinrep(utra.alt(letter, num))), whitespace)) local int = utra.tag('int', utra.cat(num, utra.cat(utra.joinrep(num), whitespace))) local tail, atom local sum = utra.thunk(function() return utra.tag('sum', utra.cat(utra.tag('first', atom), tail)) end) local parenthesized = utra.cat(utra.lit('('), utra.cat(whitespace, utra.cat(sum, utra.cat(utra.lit(')'), whitespace)))) local negation = utra.thunk(function() return utra.tag('negate', utra.cat(utra.lit('-'), utra.cat(whitespace, atom))) end) atom = utra.alt(negation, utra.alt(int, utra.alt(ident, parenthesized))) local addition = utra.cat(utra.lit('+'), utra.cat(whitespace, atom)) local subtraction = utra.cat(utra.lit('-'), utra.tag('negate', utra.cat(whitespace, atom))) tail = utra.rep(utra.alt(addition, subtraction)) local assign = utra.tag('assign', utra.cat(utra.tag('dest', ident), utra.cat(utra.lit('='), utra.cat(whitespace, utra.tag('src', sum))))) local parsed = assign.parse('x = 53 - -1 + This - 1 + -3 + -(-x) - now - (x+y-z)+2a phrase of 3 words.', 1) utra.dump(parsed) function compiler() return { names = {}, counter = 0, outcode = {}, emit = function(self, ...) for _, val in pairs({...}) do table.insert(self.outcode, val) end end, id_index = function(self, id) local n = self.names[id] if n == nil then n = self.counter self.names[id] = n self.counter = self.counter + 1 end return n end, compile = function(self, ast) if ast.sum then self:compile(ast.sum.first) for _, item in pairs(ast.sum.items) do if item.negate then self:compile(item.negate) self:emit('-') else self:compile(item) self:emit('+') end end elseif ast.int then local n = tonumber(ast.int) if n > -8 and n < 7 then self:emit('lit4.'..n) elseif n > -128 and n < 128 then self:emit('lit8', n) else self:emit('lit32', n) end elseif ast.negate then if ast.negate.int then self:compile({int=-ast.negate.int}) else self:compile(ast.negate) self:emit('neg') end elseif ast.ident then local n = self:id_index(ast.ident) local id = ast.ident if n < 15 then self:emit('load.'..n) else self:emit('load', n) end elseif ast.assign then local n = self:id_index(ast.assign.dest.ident) self:compile(ast.assign.src) if n < 15 then self:emit('store.'..n) else self:emit('store', n) end end end, } end local c = compiler() c:compile(parsed.val) utra.dump(c.names) utra.dump(c.outcode) -- Local Variables: -- compile-command: "luajit -e 'f, err = loadfile(\"microtrans.lua\") if err then print(err) else print(f()) end'" -- End: