#!/usr/bin/python3 """Small PEG EDSL. See spinfix.py for an example. parse(g, s, p) parses an input string s starting at position p according to a Straightpin Grammar g, returning either (True, p, m) where p is the position where parsing ended and m is the value of the match, or (False, p, None) where p is the position where parsing failed. A Straightpin Grammar is one of the following: - A regular expression. - A string, matching that literal string. - A list of Straightpin Grammars, matching their concatenation. - A function which returns a Straightpin Grammar when invoked with no arguments. (This permits recursion.) - A tuple whose first element is 'or' and whose later elements are Straightpin Grammars; this tries them in order and, when one matches, the entire combination matches. - A 2-tuple whose first element is 'any' and whose other element is a Straightpin Grammar; this matches the second element zero or more times. - A 2-tuple whose first element is 'not' and whose other element is a Straightpin Grammar. This matches the empty string, but only if its argument fails to match starting there. - A tuple whose first element is a function and whose other elements are Straightpin Grammars. This matches the concatenation of the grammars, then if successful passes the values they matched to the function as arguments, and the function’s return value is the match value. I was hoping to make this smaller than my 'tack.py' but actually it’s bigger, but it’s still only 45 lines. This is significantly inspired by Darius Bacon’s Parson and Peglet . His crusoeparse is even more similar and worth looking at. """ import re function = type(lambda: None) def parse(g, s, p=0): if type(g) is re.Pattern: m = g.match(s, p) return (True, m.end(), m) if m else (False, p, None) elif type(g) is str: return (True, p + len(g), g) if s[p:p+len(g)] == g else (False, p, None) elif type(g) is list: m = [] for gi in g: ok, pi, mi = parse(gi, s, p) if not ok: return False, p, None m.append(mi) p = pi return True, p, m elif type(g) is function: return parse(g(), s, p) elif g[0] == 'or': end = p for gi in g[1:]: ok, pi, mi = parse(gi, s, p) if ok: return ok, pi, mi end = max(end, pi) return False, end, None elif g[0] == 'any': m = [] while True: ok, pi, mi = parse(g[1], s, p) if not ok: return True, p, m m.append(mi) assert pi > p, "Kleene closure over ε" p = pi elif type(g[0]) is function: ok, p, m = parse(list(g[1:]), s, p) return (True, p, g[0](*m)) if ok else (ok, p, m) elif g[0] == 'not': ok, pi, mi = parse(g[1], s, p) return (False, pi, None) if ok else (True, p, None) else: raise ValueError('Invalid grammar', g)