/* Tiny Lisp: how small can you make Lisp? * * Mostly from * https://www.mail-archive.com/kragen-hacks@canonical.org/msg00164.html * in 02007. Somewhat modified in 02022. * * The Lisp 1.5 manual's definition of Lisp in itself is 1222 * characters by my count; it doesn't include read, print, atom * interning, memory management, or activation record management. How * small could I make a C version? This version is 6.5K, 7.5K * including the GC skeleton. */ #define _GNU_SOURCE #include #include #include #include #include /* Start with cons, print, and atoms: ============================== */ #define MAXATOMS 1024 typedef struct pair { struct pair *car; struct pair *cdr; } pair; pair *cons(pair *car, pair *cdr) { pair *rv = malloc(sizeof(*rv)); rv->car = car; rv->cdr = cdr; return rv; } int streq(char *string, int len, char *ref) { if (strlen(ref) != len) return 0; return memcmp(string, ref, len) == 0; } typedef char **atom; /* considered putting the actual strings in here instead of strdupping; it made things slightly more complex */ char *atomtable[MAXATOMS] = {0}; atom intern(char *string, int len) { atom i; for (i = atomtable; *i; i++) if (streq(string, len, *i)) return i; if (i == &atomtable[MAXATOMS-1]) abort(); *i = strndup(string, len); return i; } atom as_atom(pair *x) { atom i = (atom)x; if (i >= atomtable && i < &atomtable[MAXATOMS]) return i; return 0; } void print(pair *expr) { atom x = as_atom(expr); if (x) { printf("%s", *x); return; } printf("("); while (expr) { print(expr->car); expr = expr->cdr; if (expr) printf(" "); } printf(")"); } /* now "read": =============================================== */ int is_atom_char(char c) { return (c && c != ')' && c != '(' && c != ' '); } pair *lread_atom(char **in) { char *org = *in; do (*in)++; /* do-while: avoid empty atoms */ while (is_atom_char(**in)); return (pair*)intern(org, *in-org); } pair *lread(char **in); pair *lread_list_tail(char **in) { pair *car; if (!**in) abort(); if (**in == ')') { (*in)++; return 0; } if (**in == ' ') { (*in)++; return lread_list_tail(in); } car = lread(in); return cons(car, lread_list_tail(in)); } pair *lread(char **in) { if (!**in) abort(); if (**in == '(') { (*in)++; return lread_list_tail(in); } if (**in == ' ') { (*in)++; return lread(in); } return lread_atom(in); } /* evaluation ===================================================== */ /* so now we have cons (7 loc), atoms (21 loc), print (14 loc), and read (24 loc); we can ignore memory management (for now) and activation records; what about evaluation? */ /* evalquote[fn;x] = apply[fn;x;NIL] */ pair *apply(pair *fn, pair *x, pair *a); pair *evalquote(pair *fn, pair *x) { return apply(fn, x, 0); } atom car_atom, cdr_atom, cons_atom, atom_atom, eq_atom, lambda_atom, label_atom, t_atom, quote_atom, cond_atom; pair *basic_env; void init() { car_atom = intern("car", 3); cdr_atom = intern("cdr", 3); cons_atom = intern("cons", 4); atom_atom = intern("atom", 4); eq_atom = intern("eq", 2); lambda_atom = intern("lambda", 6); label_atom = intern("label", 5); t_atom = intern("t", 1); quote_atom = intern("quote", 5); cond_atom = intern("cond", 4); basic_env = cons(cons((pair*)t_atom, (pair*)t_atom), 0); } pair *truth_value_of(int x) { return x ? (pair*)t_atom : 0; } /* apply[fn;x;a] = */ /* [atom[fn] -> [eq[fn;CAR] -> caar[x]; */ /* eq[fn;CDR] -> cdar[x]; */ /* eq[fn;CONS] -> cons[car[x];cadr[x]]; */ /* eq[fn;ATOM] -> atom[car[x]]; */ /* eq[fn;EQ] -> eq[car[x];cadr[x]]; */ /* T -> apply[eval[fn;a];x;a]]; */ /* eq[car[fn];LAMBDA] -> eval[caddr[fn];pairlis[cadr[fn];x;a]]; */ /* eq[car[fn];LABEL] -> apply[caddr[fn];x;cons[cons[cadr[fn]; */ /* caddr[fn]];a]]] */ pair *eval(pair *fn, pair *a); pair *pairlis(pair *x, pair *y, pair *a); pair *apply(pair *fn, pair *x, pair *a) { atom fn_atom = as_atom(fn); if (fn_atom == car_atom) return x->car->car; if (fn_atom == cdr_atom) return x->car->cdr; if (fn_atom == cons_atom) return cons(x->car, x->cdr->car); if (fn_atom == atom_atom) return truth_value_of(!!as_atom(x->car)); if (fn_atom == eq_atom) return truth_value_of(x->car == x->cdr->car); if (fn_atom) return apply(eval(fn, a), x, a); if (as_atom(fn->car) == lambda_atom) return eval(fn->cdr->cdr->car, pairlis(fn->cdr->car, x, a)); if (as_atom(fn->car) == label_atom) return apply(fn->cdr->cdr->car, x, cons(cons(fn->cdr->car, fn->cdr->cdr->car), a)); return 0; } /* eval[e;a] = [atom[e] -> cdr[assoc[e;a]]; */ /* atom[car[e]] -> */ /* [eq[car[e],QUOTE] -> cadr[e]; */ /* eq[car[e];COND] -> evcon[cdr[e];a]; */ /* T -> apply[car[e];evlis[cdr[e];a];a]]; */ /* T -> apply[car[e];evlis[cdr[e];a];a]] */ /* (note i am using assq rather than assoc so i don't need equal) */ pair *assq(pair *e, pair *a); pair *evcon(pair *c, pair *a); pair *evlis(pair *m, pair *a); pair *eval(pair *e, pair *a) { atom ea; if (as_atom(e)) return assq(e, a)->cdr; ea = as_atom(e->car); if (ea) { if (ea == quote_atom) return e->cdr->car; else if (ea == cond_atom) return evcon(e->cdr, a); } /* duplicated case condensed */ return apply(e->car, evlis(e->cdr, a), a); } /* evcon[c;a] = [eval[caar[c];a] -> eval[cadar[c];a]; */ /* T -> evcon[cdr[c];a]] */ pair *evcon(pair *c, pair *a) { if (eval(c->car->car, a)) return eval(c->car->cdr->car, a); else return evcon(c->cdr, a); } /* evlis[m;a] = [null[m] -> NIL; */ /* T -> cons[eval[car[m];a];evlis[cdr[m];a]]] */ pair *evlis(pair *m, pair *a) { if (!m) return 0; return cons(eval(m->car, a), evlis(m->cdr, a)); } /* pairlis[x;y;a] = [null[x]->a;T->cons[cons[car[x];car[y]]; */ /* pairlis[cdr[x];cdr[y];a]]] */ pair *pairlis(pair *x, pair *y, pair *a) { if (!x) return a; return cons(cons(x->car, y->car), pairlis(x->cdr, y->cdr, a)); } /* assoc[x;a] = [equal[caar[a];x]->car[a];T->assoc[x;cdr[a]]] */ pair *assq(pair *x, pair *a) { if (a->car->car == x) return a->car; else return assq(x, a->cdr); } int main(int argc, char **argv) { pair *expr; init(); for (;;) { char buf[1024]; char *c; printf("?? "); fflush(stdout); if (!fgets(buf, sizeof(buf), stdin)) break; c = buf; expr = lread(&c); print(expr); printf("\n"); print(eval(expr, basic_env)); printf("\n"); } return 0; } // GC skeleton. This is definitely not correct in any way, but it // should be about the right size for the Cheney two-finger GC. enum { BROKEN_HEART = -1 }; int tospacep; pair *tospace, *fromspace; void get_roots() { /* XXX */ } static inline void swap(pair **a, pair **b) { pair **c = a; a = b; b = c; } void gc_visit(pair **cell) { if (!as_atom(*cell)) { /* if it is a pair, not an atom */ if ((intptr_t)(*cell)->car == BROKEN_HEART) { *cell = (*cell)->cdr; } else { size_t dest = tospacep++; tospace[dest] = **cell; (*cell)->car = (pair*)BROKEN_HEART; (*cell)->cdr = (pair*)dest; *cell = tospace + dest; } } } void gc() { tospacep = 0; int tovisit = 0; get_roots(); while (tovisit < tospacep) { gc_visit(&tospace[tovisit].car); gc_visit(&tospace[tovisit].cdr); tovisit++; } swap(&fromspace, &tospace); }