// Count occurrences of each word — an example program for a simple
// trie implementation.

// Applied to the 4,452,069-byte bible-pg10.txt from Project
// Gutenberg, compiled with GCC 5.4.0-6ubuntu1~16.04.4 for i386 with
// -O5, it generates the correct 579,687-byte output file in about
// 330 ms, allocating 7,468,608 bytes according to valgrind (about
// 12.9× the size of the output) and running 553,117,037 instructions
// (about 124 per input byte).  Compiled for amd64 it executes 16%
// less instructions but takes 42% longer to run because it allocates
// like twice as much memory and consequently has worse cache miss
// rates.

// To be somewhat more concrete, there were 34,057 distinct words
// listed in the output file, stored in 109,714 allocated trie nodes
// (14,929,024 bytes in the amd64 version).  This works out to 3.22
// nodes per word.

// (Hmm, maybe a Patricia or Patricia-ish version of this code, using
// line endings, could beat GNU sort.  It would use one node per word
// instead of 3+.  And it could mmap, avoiding copying under many
// conditions.)

// The Python version of this program takes 845 milliseconds, about
// twice as long as this version.

#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define LENGTH(x) ((sizeof(x))/(sizeof(*x)))

typedef struct trie {
  struct trie *kids[16];
  int freq;
} trie;

static trie *new_trie()
{
  trie *t = malloc(sizeof(*t));
  if (!t) abort();

  for (int i = 0; i < LENGTH(t->kids); i++) {
    t->kids[i] = NULL;
  }
  
  t->freq = 0;

  return t;
}

static trie *kid(trie *t, int i)
{
  if (!t->kids[i]) t->kids[i] = new_trie();
  return t->kids[i];
}

static trie *blaze(trie *t, char *s, int n)
{
  return n ? blaze(kid(kid(t, s[0] >> 4 & 0xf), s[0] & 0xf), s+1, n-1) : t;
}

static void increment(trie *t, char *word)
{
  blaze(t, word, strlen(word))->freq++;
}

static void iterate_depth(trie *t,
                          int depth,
                          char *buf,
                          void (*f)(void *userdata, char *word, int freq),
                          void *userdata)
{
  if (!t) return;

  if (t->freq) {
    buf[depth/2] = '\0';
    f(userdata, buf, t->freq);
  }
  
  for (int i = 0; i < LENGTH(t->kids); i++) {
    buf[depth/2] = depth & 1 ? (buf[depth/2] & 0xf0) | i : i << 4;
    iterate_depth(t->kids[i], depth+1, buf, f, userdata);
  }
}

static void iterate(trie *t,
                    void (*f)(void *userdata, char *word, int freq),
                    void *userdata)
{
  char buf[256] = {0};
  iterate_depth(t, 0, buf, f, userdata);
}

static void print_freq(void *userdata, char *word, int freq)
{
  printf("%8d %s\n", freq, word);
}

static void delete(trie *t)
{
  if (!t) return;

  for (int i = 0; i < LENGTH(t->kids); i++) {
    delete(t->kids[i]);
    t->kids[i] = NULL;
  }

  free(t);
}

int main()
{
  trie *t = new_trie();
  char word[256];
  int wordlen = 0, c;

  while ((c = getchar()) != EOF) {
    if (isspace(c)) {
      if (wordlen) {
        word[wordlen] = '\0';
        increment(t, word);
        wordlen = 0;
      }
    } else {
      word[wordlen++] = c;
      assert(wordlen < sizeof(word));
    }
  }

  iterate(t, print_freq, NULL);

  delete(t);

  return 0;
}