#!/usr/bin/python # -*- coding: utf-8 -*- """Make a mnemonic word dictionary. Input should be a wordlist with frequencies such as . The idea is that this list of words is useful for encoding random numbers in base 2048. For example, SHA-2 hashes or passphrases. """ import sys import difflib def main(): words = [] for line in sys.stdin: freq, word = line.split() print "\t%s?" % word, if not 4 <= len(word) <= 8: print "unreasonable length" continue # Omit words without at least a difference of 2 from every # previous word. too_similar = False closest = None for already_chosen_word in words: d = distance(word, already_chosen_word) if closest is None or d < closest[1]: closest = already_chosen_word, d if d < 2: print "too close to %s" % already_chosen_word too_similar = True break if too_similar: continue if closest is None: print "first viable word" else: print "unique enough, closest was %s (%d)" % closest words.append(word) print '%d %s' % (len(words), word) if len(words) >= 2048: break def distance(a, b): matcher = difflib.SequenceMatcher(None, a,b) return sum(max(i2-i1, j2-j1) for op, i1, i2, j1, j2 in matcher.get_opcodes() if op != 'equal') if __name__ == '__main__': main()