"""TACK: a smaller version of a PEG, trimmed down from peg.py. This PEG embedded DSL is 27 lines of code. It’s kind of untested, though, and the parse trees it produces are kind of ugly, and it doesn’t report parsing errors usefully (not even a position). See tacktest.py for a minimal smoke test. These parsing expression nodes return None if they fail to match. """ from collections import namedtuple class PE: """Parsing expression base class.""" __add__ = lambda s, o: Seq(s, o if isinstance(o, PE) else Lit(o)) __or__ = lambda s, o: Alt(s, o if isinstance(o, PE) else Lit(o)) __invert__ = lambda self: Neg(self) class Lit(PE, namedtuple('Lit', ['text'])): """Literal — a parsing expression matching a literal string.""" def parse(self, text, position): if self.text == text[position:position + len(self.text)]: return self.text, position + len(self.text) class Seq(PE, namedtuple('Seq', ['a', 'b'])): """Sequence — a parsing expression matching a sequence of two PEs.""" def parse(self, text, position): a = self.a.parse(text, position) if a: b = self.b.parse(text, a[1]) return ([a[0], b[0]], b[1]) if b else None class Alt(PE, namedtuple('Alt', ['a', 'b'])): """Alternation — a parsing expression that falls back to plan B on failure. """ def parse(self, text, position): return self.a.parse(text, position) or self.b.parse(text, position) class Neg(PE, namedtuple('Neg', ['negated'])): """Negation — a zero-width negative lookahead assertion parsing expression. This consumes no text, but matches at a position only if its argument would have failed to match there. """ def parse(self, text, position): return None if self.negated.parse(text, position) else (None, position) class Nonterminal(PE): """Nonterminal — a parsing expression with a name. This serves two purposes. First, it tags the parse tree being returned with the nonterminal name, which can serve as a variant tag if you organize your grammar that way. Second, it’s mutable, unlike the other parsing expression node types, thus permitting circular references to be formed. Assign to its .rule attribute after creating it in order to provide its definition — otherwise its .parse method will raise an exception. """ def __init__(self, name): self.name = name # .rule is added later in order to support recursion. def parse(self, text, position=0): r = self.rule.parse(text, position) return ((self.name, r[0]), r[1]) if r else None