#!/usr/bin/python3 """A dumb compiler. I want to compile this into working amd64 assembly: n = 0; for (i = 0; i < 10; i++) { n += i; } print(n); The parsing libraries I have installed are, uh, python3-pyparsing, which I think is because calibre depends on it. I don't have its docs, but its pydoc seems fairly good. It looks pretty reasonable; it's an embedded DSL for PEG grammars with an optional Packrat implementation. This gets pretty close to producing stack code. It still has one big bug Evidently if your parse action returns a list, but not a tuple, pyparsing flattens it (one level!) into the parent sequence of tokens. Also toks is not really a list, and the same is true of the return type of parseString, which is actually a ParseResult, which displays misleadingly as a tuple of a list and a dict. In particular:: Word(alphas)('name').parseString('foo')['name'] evaluates to 'foo', as does indexing with [0]. ``__call__`` is overridden to .setResultsName, which I guess sticks something into the ``_ParseResults__tokdict``. This is sort of documented in the docstrings for setParseAction and ParseResults. """ from pyparsing import Word, Suppress, Literal, Forward, alphas, nums name = Word(alphas) integer = Word(nums).setParseAction(lambda toks: ('push', int(toks[0]))) expr = Forward() assignment = name + '=' + expr assignment.setParseAction(lambda toks: [toks[2], ('dupstore', toks[0])]) augment = name + '+=' + expr augment.setParseAction(lambda toks: [toks[2], ('addstore', toks[0])]) comparison = name + '<' + expr # XXX refactor to fake left recursion comparison.setParseAction(lambda toks: [('load', toks[0]), toks[2], 'lessthan']) post_increment = name + '++' post_increment.setParseAction(lambda toks: ('incr', toks[0])) varfetch = name().setParseAction(lambda toks: ('load', toks[0])) args = expr + (',' + expr)[...] # XXX this has the same problem as the forstmt below funcall = name + Suppress('(') + args + Suppress(')') funcall.setParseAction(lambda toks: [toks[1], toks[0], 'call']) expr <<= assignment | integer | comparison | post_increment | augment | funcall | varfetch assert args.matches('x') assert args.matches('x, y') assert args.matches('n') assert name.matches('print') assert funcall.matches('print(n)') assert funcall.matches(' print(n)') assert expr.matches(' print(n)') stmt = Forward() forstmt = (Literal('for') + '(' + expr('init') + ';' + expr('cond') + ';' + expr('step') + ')' + stmt('body')) # XXX this is dropping all but the first instruction of each of the # children! forstmt.setParseAction(lambda toks: [toks.init, 'loop', toks.cond, 'while', toks.body, toks.step, 'repeat']) exprstmt = expr + Suppress(';') exprstmt.setParseAction(lambda toks: list(toks) + [('pop')]) stmt <<= ( exprstmt | forstmt | Suppress("{") + stmt[...] + Suppress("}") ) assert stmt.matches(' print(n);') program = stmt[1, ...] example_program = ''' n = 0; for (i = 0; i < 10; i++) { n += i; } print(n); print(n < 3, i); ''' if __name__ == '__main__': for instruction in list(program.parseString(example_program)): if type(instruction) is tuple: print(*instruction) else: print(instruction)