#!/usr/bin/python3 """Recursive-descent expression compiler emitting pseudo-assembly. Perhaps of some didactic use. Starting from some rather flawed GPT-4 output, I hacked it up based on Darius Bacon’s eg_calc_compile , which is an improvement on its model , and based on some remarks in Jack Crenshaw’s “Let’s Build a Compiler”. Given no input, it produces this output from its example expression ``3 + 5 * (x - 20)``:: loadconst r0, 3 loadconst r1, 5 loadvar r2, x loadconst r3, 20 sub r2, r2, r3 mul r1, r1, r2 add r0, r0, r1 To the extent possible under law, Kragen Javier Sitaker has waived all copyright and related or neighboring rights to Didcomp. This work is published from Argentina. See also didcomp.c. """ import re import sys def tokenize(text: str) -> list[str]: "Return a stack of tokens representing the text, reversed." # Note that this implicitly skips over whitespace, but catches any # leftover non-whitespace characters with the \S case, so they can # produce parse errors. return re.findall(r'(?x) \d+ | [+*/()-] | \w+ | \S', text)[::-1] class ParseError(Exception): pass def parse_expression(dep: int, tokens: list[str]) -> None: "Translate the expression in token-stack tokens; initial stack depth is dep" parse_mul_div(dep, tokens) while tokens[-1:] in (['+'], ['-']): op = tokens.pop() parse_mul_div(dep+1, tokens) print(f'\t{"add" if op == "+" else "sub"} r{dep}, r{dep}, r{dep+1}') def parse_mul_div(dep: int, tokens: list[str]) -> None: parse_primary(dep, tokens) while tokens[-1:] in (['*'], ['/']): op = tokens.pop() parse_primary(dep+1, tokens) print(f'\t{"mul" if op == "*" else "div"} r{dep}, r{dep}, r{dep+1}') def parse_primary(dep: int, tokens: list[str]) -> None: token = tokens.pop() if tokens else None if token == '-': # Unary minus operator parse_primary(dep, tokens) print(f'\tneg r{dep}') elif token and token.isdigit(): # Literal numeric constant print(f'\tloadconst r{dep}, {token}') elif token and token[0].isalpha(): # Variable reference print(f'\tloadvar r{dep}, {token}') elif token == '(': # Nested parenthesized expression parse_expression(dep, tokens) token = tokens.pop() if tokens else None if token != ')': raise ParseError(token) else: raise ParseError(token) if __name__ == '__main__': tokenizer_input = "3 + 5 * (x - 20)" if len(sys.argv) == 1 else sys.argv[1] tokens = tokenize(tokenizer_input) parse_expression(0, tokens) if tokens: # Was there trailing garbage after the expression? raise ParseError(tokens[-1])