#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""Parsing expression grammars.

The wee hours of the morning have come, and I’m full of Mongolian
barbecue and Coca-Cola, with no desire to sleep.  Last night’s quick
hack, μSQL, was successful insofar as it produced a more or less
working SQL engine capable of evaluating simple SELECT and INSERT
statements, but suffered greatly from having to use regular
expressions to parse them.  And it turned out that actually there is
nothing in the Python standard library to help you write parsers for
things like that.

So I thought I’d see how much of a parsing engine I could hack
together in a couple of hours.  It wouldn’t have to be super efficient
to have helped a huge amount with the μSQL hack.  It wouldn’t have to
generate code, for example, or to allow you to attach arbitrary
semantic actions — it could just generate a syntax tree
interpretively.  And it wouldn’t have to include a parser for its own
language, as we could use an embedded DSL in Python.  This would save
us from having to explicitly implement grouping, for example, or
operator precedence, and it would allow us to smoothly integrate
things like Python regular expressions.

I wrote a PEG parser generator once before, called peg-bootstrap.  It
was written in itself, and it compiled to JS.  It supported negation
`!`, alternation `/`, terminal strings, and nonterminals, but not
repetition `*` or `+`, although that’s very useful in practice.  And
it supported recursive rules, which is an important consideration for
doing this as an embedded DSL in Python — you can’t just use Python
variables for your nonterminals.

The ideal thing to write in an embedded Python DSL for something like

    sentence        <- _ rule sentence / _ rule.

would be something like

    sentence = _, rule, sentence / _, rule

but there are three major problems with making that work.

First, the comma and / operators have the wrong relative
precedence — comma’s precedence is super low — and second, comma isn't
overrideable, so what you get out of it is a tuple, whose operators
aren’t overridden.  If you want something that binds tighter than /,
you need something wild like **.  You could improve the situation a
bit by using |, which binds looser than everything except boolean and
relational operators.  So you could hope for something like

    sentence = _ + rule + sentence | _ + rule

But then there’s the third problem, which is that you can’t overload
plain name lookup in Python like you can in Ruby.  You can overload it
on objects, though.

Except maybe with metaclasses and __prepare__ we can?  Holy hell, we
can.  holyshitnamespaces.py is the capsule summary, but I'm doing it
below, too.

The result is that I have spent an hour and a half getting that Python
syntactic sugar to work, but now it mostly does, except that sometimes
you have to stick in an explicit `Lit` or Python says, “TypeError:
Can't convert 'Nonterminal' object to str implicitly”, and you can’t
refer to any variables defined elsewhere because the magic namespace
intercepts all such attempts.

And now I’m kind of sleepy, but I do seem to be generating reasonable
parse trees for arithmetic expressions from a PEG.  And it’s only been
two hours!

"""


class PE:
    "Parsing expression."
    def __add__(self, other):
        return Sequence(self, peify(other))

    def __or__(self, other):
        return Alternation(self, peify(other))

    def __invert__(self):
        return Negation(self)


def peify(obj):
    return obj if isinstance(obj, PE) else Lit(obj)


class Sequence(PE):
    def __init__(self, a, b):
        self.items = a, b

    def __repr__(self):
        return '({!r}) + ({!r})'.format(*self.items)

    def parse(self, text, position):
        a, b = self.items
        result_a, pos = a.parse(text, position)
        result_b, pos = b.parse(text, pos)
        return [result_a, result_b], pos


class Alternation(PE):
    def __init__(self, a, b):
        self.items = a, b

    def __repr__(self):
        return '({!r}) | ({!r})'.format(*self.items)

    def parse(self, text, position):
        a, b = self.items
        try:
            return a.parse(text, position)
        except ParseFailure:
            pass
        return b.parse(text, position)


class Negation(PE):
    def __init__(self, negated):
        self.negated = negated

    def __repr__(self):
        return '~{!r}'.format(self.negated)

    def parse(self, text, position):
        try:
            val = self.negated.parse(text, position)
        except ParseFailure:
            return None, position
        else:
            raise ParseFailure(val, position)


class Nonterminal(PE):
    def __init__(self, name):
        self.name = name
        # There’s also a .definition attribute which may be populated
        # later.

    def __repr__(self):
        return '${}'.format(self.name)
    
    def parse(self, text, position=0):
        result, pos = self.definition.parse(text, position)
        return (self.name, result), pos


class Lit(PE):
    def __init__(self, text):
        self.text = text

    def __repr__(self):
        return '#{!r}'.format(self.text)

    def parse(self, text, position):
        end = position + len(self.text)
        if text[position:end] == self.text:
            return self.text, end
        raise ParseFailure(self.text, position)


class ParseFailure(Exception): pass


# Uh-oh, the fact that I have to include Lit here to make it visible
# betokens major trouble to come.
magic_names = set(['__name__', '__module__', '__qualname__', 'Lit'])


class GrammarNamespace(dict):
    def __getitem__(self, name):
        if name in magic_names:
            return dict.__getitem__(self, name)

        if not dict.__contains__(self, name):
            val = Nonterminal(name)
            dict.__setitem__(self, name, val)
            return val
        else:
            return dict.__getitem__(self, name)

    def __setitem__(self, name, val):
        if name in magic_names:
            return dict.__setitem__(self, name, val)

        if dict.__contains__(self, name):
            wrapper = dict.__getitem__(self, name)
        else:
            wrapper = Nonterminal(name)
            dict.__setitem__(self, name, wrapper)
        wrapper.definition = val


class GrammarMetaclass(type):
    @classmethod
    def __prepare__(cls, name, bases, **kwds):
        return GrammarNamespace()


class Grammar(metaclass=GrammarMetaclass):
    pass


class Example(Grammar):
    sentence = _ + rule + sentence | _ + rule
    sp = Lit(' ') | '\n' | '\t'
    _ = sp + _ | ''
    rule = name + _ + '<-' + _ + choice + '.' + _
    choice = sequence + '/' + _ + choice | sequence
    sequence = term + sequence | Lit('->') + _ + expr | ''
    expr = Lit('(') + _ + exprcontents + ')' + _
    exprcontents = (~Lit('(') + ~Lit(')') + char | expr) + exprcontents | ''
    term = (name + _ + ':' + term | Lit('!') + _ + term | string | name + _
            | Lit('(') + _ + choice + ')' + _)
    string = Lit("'") + stringcontents + "'" + _
    stringcontents = (~Lit('\\') + ~Lit("'") + char + stringcontents
                      | Lit('\\') + char + stringcontents
                      | '')
    meta = Lit('!') | "'" | '<-' | '/' | '.' | '(' | ')' | ':' | '->'
    name = namechar + name | namechar
    namechar = ~meta + ~sp + char

    yes = Lit('yes') | 'Yes'
    no = Lit('no') | 'No'
    response = (yes | no) + '.'
    

class Arithmetic(Grammar):
    sp = Lit(' ') | '\n' | '\t'
    _ = sp + _ | ''
    digit = Lit('0') | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'
    digits = digit + digits | digit
    number = (digits + '.' + digits | digits + '.' | Lit('.') + digits | digits) + _
    exponentiation = number + '**' + _ + number | number
    # Note that this generates the parse trees for division and
    # subtraction with the wrong associativity.
    multiplicative = (exponentiation + '/' + _ + multiplicative
                      | exponentiation + '*' + _ + multiplicative
                      | exponentiation)
    additive = (multiplicative + '+' + _ + additive
                | multiplicative + '-' + _ + additive
                | multiplicative)


if __name__ == '__main__':
    print(Example.sentence.definition)
    print(Example.sp.definition)
    print(Example._.definition)
    print(Example.stringcontents.definition)
    print(Example.meta.definition)

    print(Example.response.parse('Yes.'))
    print(Example.response.parse('no.'))

    print(Arithmetic.additive.parse('13 + 2 * 3 ** 4 + 5'))