// written in ed because cellphone keyboards suck at vi // linear probing version to see if // better locality improves things #include #include #include char *xstrdup(char *s) { char *p = strdup(s); if (!p) abort(); return p; } typedef struct node { char *s; int count; } node; enum { hash_size = 65536 }; typedef struct table { node a[hash_size]; } table; table words; uint32_t hash(char *s) { uint32_t h = 0; while (*s) h = h * 17903 + *s++; return h; } node *upsert(table *h, char *s) { int i = hash(s) & (hash_size - 1); for (; h->a[i].s; i = (i + 1) & (hash_size - 1)) { if (0 == strcmp(s, h->a[i].s)) return &h->a[i]; } h->a[i] = (node) { xstrdup(s) }; return &h->a[i]; } void print_words(table *t) { for (size_t i = 0; i != hash_size; i++) { node *n = &t->a[i]; if (n->s) printf("%8d %s\n", n->count, n->s); } } int max_chain(table *t) { int max = 0, len = 0; for (size_t i = 0; i != hash_size; i++) { len = t->a[i].s ? len + 1 : 0; if (len > max) max = len; // XXX handle wraparound } return max; } #ifdef NO_HASHING #define upsert(...) (&dummy) #endif node dummy; int main(int argc, char **argv) { char buf[128]; for (;;) { if (!fgets(buf, sizeof buf, stdin)) break; char *p = buf; for (char *q = buf; *q; q++) { char c = *q; if ('a' <= c && c <= 'z' || 'A' <= c && c <= 'Z') { if (!*p) p = q; continue; } *q = 0; if (strlen(p)) upsert(&words, p)->count++; p = q; } if (strlen(p)) upsert(&words, p)->count++; } print_words(&words); printf("dummy count %d", dummy.count); fprintf(stderr, "max chain %d\n", max_chain(&words)); return 0; }