#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Make a mnemonic word dictionary.

Input should be a wordlist with frequencies such as
<http://canonical.org/~kragen/sw/wordlist>.

The idea is that this list of words is useful for encoding random
numbers in base 2048.  For example, SHA-2 hashes or passphrases.

"""

import sys
import difflib


def main():
    words = []
    for line in sys.stdin:
        freq, word = line.split()
        print "\t%s?" % word,
        if not 4 <= len(word) <= 8:
            print "unreasonable length"
            continue

        # Omit words without at least a difference of 2 from every
        # previous word.
        too_similar = False
        closest = None
        for already_chosen_word in words:
            d = distance(word, already_chosen_word)
            if closest is None or d < closest[1]:
                closest = already_chosen_word, d
            if d < 2:
                print "too close to %s" % already_chosen_word
                too_similar = True
                break

        if too_similar:
            continue

        if closest is None:
            print "first viable word"
        else:
            print "unique enough, closest was %s (%d)" % closest

        words.append(word)
        print '%d %s' % (len(words), word)
        if len(words) >= 2048:
            break

def distance(a, b):
    matcher = difflib.SequenceMatcher(None, a,b)
    return sum(max(i2-i1, j2-j1)
               for op, i1, i2, j1, j2 in matcher.get_opcodes()
               if op != 'equal')
        
if __name__ == '__main__':
    main()