#!/usr/bin/python3 """Generate words using a FSM model, or really a first-order Markov chain. Words should be on stdin, one per line. Characters α and ω are reserved. """ import argparse import random import sys def model(words): model = {} for word in words: for a, b in pairs('α' + word.strip() + 'ω'): if a not in model: model[a] = [] model[a].append(b) return model def pairs(seq): seq = iter(seq) last = seq.__next__() for item in seq: yield last, item last = item def generate(model): state = 'α' while True: state = random.choice(model[state]) if state == 'ω': break yield state def graphviz(model): yield 'digraph grammar {\n' nodes = dict((letter, i) for i, letter in enumerate(set(model) | set(b for a in model for b in model[a]))) for letter, i in nodes.items(): yield ' %d [label="%s"];\n' % (i, letter) for dest in set(model.get(letter, ())): yield ' %d -> %d;\n' % (i, nodes[dest]) yield '}\n' def main(): p = argparse.ArgumentParser(description=__doc__) p.add_argument('-w', '--words', type=int, help='generate some number of words') args = p.parse_args() m = model(sys.stdin) if args.words is None: sys.stdout.writelines(graphviz(m)) else: for i in range(args.words): print(''.join(generate(m))) if __name__ == '__main__': main()