#!/usr/bin/python3 """Generate random sentences. This uses a grammar of a very limited subset of English. Among the restrictions that make it work: - No adjectives or nouns begin with vowels. - All subjects are in third-person singular. """ import argparse import random import textwrap ns = 'car lid dog man kid bat sea hat cup war way zoo jar toe web'.split() vs = 'rots eats gets hits cuts fits revs uses wets jogs digs hugs'.split() adjs = 'red big sad hot bad lit sly wan wee fat raw rum'.split() # As a warmup, a top-down construction approach. noun = lambda: random.choice(ns) verb = lambda: random.choice(vs) adj = lambda: random.choice(adjs) def sentence(clause): return '{}.'.format(clause().capitalize()) def tdclause(): if random.random() < .6: # This must be ≥.5 to avoid stack overflow. subj = random.choice(['{} {}'.format(adj(), noun()), noun()]) obj = random.choice(['{} {}'.format(adj(), noun()), noun()]) return 'the {} {} the {}'.format(subj, verb(), obj) else: a = tdclause() b = tdclause() return random.choice(['{} and {}'.format(a, b), '{} if {}'.format(a, b), '{} when {}'.format(a, b)]) # Now, a bottom-up approach. # Note that main() modifies these. leaves = dict(n=ns, v=vs, a=adjs, d='the a my his her'.split(), p='of at by in on to'.split(), j='and or if when but as before after'.split(), nj=['or']) rules = ['c = s vp', # clause can be subject verbphrase 'c = c j c', # or clause conjunction clause 's = np', # a subject can be a noun phrase (or a leaf, see above) 'np = d n', # a noun phrase can be a determiner and a noun 'np = np nj np', # or two nps with a noun-conjunction 'vp = v o', # a verbphrase is a verb and an object 'n = a n', # a noun can be an adjective and a noun 'o = np', # an object can be a noun phrase 'n = n p o'] # and a noun can be a noun, preposition, object sequence def buclause(min_steps, max_steps): flat_rules = [rule.split() for rule in rules] items = {k: [] for k in set(leaves.keys()).union(rule[0] for rule in flat_rules)} for i in range(max_steps): if items['c'] and i > min_steps: return ' '.join(flatten(items['c'][-1])) rule = random.choice(flat_rules) choices = [] for nt in rule[2:]: if items.get(nt) and (nt not in leaves or random.random() > .5): ci = random.randrange(len(items[nt])) # I’m not totally happy with this because it may pop # things we can’t use because of a different missing # prerequisite. choices.append(items[nt].pop(ci)) elif nt in leaves: choices.append(random.choice(leaves[nt])) else: break # missing prerequisite else: items[rule[0]].append(choices) return 'oof' def flatten(things): for thing in things: if type(thing) is list: yield from flatten(thing) else: yield thing def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--use-pronouns', action='store_true') parser.add_argument('-n', '--number-of-sentences', type=int, default=8) parser.add_argument('--max-steps', type=int, default=1024, help='Max number of productions to try per sentence' + ' (default %(default)s)') parser.add_argument('--min-steps', type=int, default=128, help='Min number of productions to try per sentence' + ' (default %(default)s)') parser.add_argument('--noun', action='append', default=[], help='Supply additional nouns') parser.add_argument('--verb', action='append', default=[], help='Supply additional verbs') parser.add_argument('--adj', '--adjective', action='append', default=[], help='Supply additional adjectives') parser.add_argument('--only', action='store_true', help='Use no default nouns, verbs, and adjectives.') args = parser.parse_args() if args.use_pronouns: leaves['s'] = 'it he she'.split() leaves['o'] = 'it him her'.split() if args.only: leaves['n'] = [] leaves['v'] = [] leaves['a'] = [] leaves['n'].extend(args.noun) leaves['v'].extend(args.verb) leaves['a'].extend(args.adj) if not leaves['n']: raise Exception('No nouns supplied') if not leaves['v']: raise Exception('No verbs supplied') if not leaves['a']: del leaves['a'] n = args.number_of_sentences min = args.min_steps max = args.max_steps print('\n'.join(textwrap.wrap(' '.join(sentence(lambda: buclause(min, max)) for i in range(n))))) if __name__ == '__main__': main()