#!/usr/bin/python3 """Generate text using Φαιστός disk characters for fictional alien plants. This currently only generates characters with a zero-order Zipf distribution; it doesn’t have even a first-order Markov model of letter associations. However, the unreasonable word lengths were bugging me too much, so I special-cased word separators. Then I realized it was going to generate all possible one- and two-letter words sooner or later, and that isn’t okay. English, by contrast, only has three one-letter words and about 30 two-letter words, out of the roughly 25 and 625 possible, so we want something like 1/20 to 1/30 probability of each possible word being valid. The obvious way of doing this with a hash function works but is pretty slow. After poking the hash functions for a while, this generates 66 distinct two-letter words (with a roughly Zipfian distribution), 2 distinct one-letter words, and almost 2000 distinct three-letter words (out of 32768 possible). This is not quite a natural-language distribution but it's close enough to pass casual inspection. It generates very Φαιστός-like strings: 𐇞𐇤𐇤𐇚𐇢𐇟𐇚𐇶𐇴𐇺𐇼|𐇳𐇚𐇤𐇳𐇟|𐇳𐇠𐇱𐇳|𐇘𐇦𐇹𐇴𐇟|𐇚𐇟𐇧𐇚|𐇺𐇺𐇚𐇚𐇚𐇳 |𐇴𐇡𐇚|𐇟𐇤𐇺𐇺𐇴|𐇨𐇷𐇚|𐇴𐇚|𐇫𐇟𐇚𐇤|𐇳𐇚𐇺𐇠𐇺|𐇚𐇚𐇺𐇸|𐇟𐇙𐇺𐇚 |𐇚𐇚𐇚|𐇖𐇥|𐇟𐇚𐇶𐇚𐇟𐇟𐇚𐇳|𐇚𐇶𐇤𐇘𐇩|𐇤𐇺|𐇨𐇚𐇠𐇚𐇘𐇷|𐇟𐇟𐇚𐇷𐇺 |𐇴𐇷𐇨𐇟𐇷𐇤𐇵𐇟𐇥𐇚|𐇤𐇚|𐇼𐇙𐇚𐇟𐇳𐇜𐇤|𐇷𐇫𐇙𐇟𐇚𐇴|𐇙𐇻𐇴|𐇲𐇚𐇺𐇺 |𐇴𐇚|𐇜𐇚𐇘𐇤|𐇚𐇘𐇚𐇫 """ import math import random import sys import textwrap letters = u'𐇚𐇟𐇺𐇤𐇳𐇘𐇴𐇫𐇥𐇙𐇞𐇩𐇶𐇷𐇼𐇜𐇠𐇵𐇱𐇹𐇗𐇲𐇸𐇡𐇣𐇦𐇧𐇛𐇢𐇻𐇖𐇨' # With a perfect Zipf distribution, the rank is exactly inversely # proportional to the frequency. Here we have 32 characters, so the # 32nd will be about 32× less common than the 1st, so about 10'000 # items is probably good enough to provide an adequate approximation. freq_letters = [letter for rank, letter in enumerate(letters) for i in range(2500//(1+rank))] def word_length(): return int(round(random.lognormvariate(math.log(4), math.log(1.5)))) def word_hash(w): "Specify the hash algorithm for filtering so it’s reproducible." n = 6 for c in w: n = (n * 0xaaa + ord(c)) % 65521 return n def word_candidate(length): return ''.join(random.choice(freq_letters) for i in range(length)) def word(length): while True: w = word_candidate(length) if not word_hash(w) & 0xf: return w def text(n): "Returns n words of text." return '|'.join(word(word_length()) for j in range(n)) def main(n): "Prints n words of text, wrapped." for line in textwrap.wrap(text(n).replace('|', ' '), width=35): print(line.replace(' ', '|')) if __name__ == '__main__': main(int(sys.argv[1]))