(* Is a binary search tree simpler than a hash table?

   This polymorphic unbalanced binary search tree takes 8 lines of
   code in OCaml and appears to work; moreover, it’s FP-persistent,
   like the standard OCaml library Map type (which isn’t unbalanced).
   I wrote a simple monomorphic, non-resizing hash table in C earlier
   today, and it was about 20 lines of code.

   This code also worked correctly as soon as I could get it to
   type-check, while the hash table code had a couple of subtle bugs.

   So I think the balance favors the binary tree, at least if you
   already have garbage collection.  Although I probably could have
   factored the hash table better.

*)

type ('a, 'b) dict = Node of 'a * 'b * ('a, 'b) dict option * ('a, 'b) dict option

let rec get k = function None -> None
  | Some (Node(k1, v1, _, _)) when k1 = k -> Some v1
  | Some (Node(k1, _, x, y)) -> if k < k1 then get k x else get k y
and put k v = function None -> Some (Node(k, v, None, None))
  | Some (Node(k1, v1, x, y)) when k1 = k -> Some (Node(k, v, x, y))
  | Some (Node(k1, v1, x, y)) -> if k < k1 then Some(Node(k1, v1, put k v x, y))
                                           else Some(Node(k1, v1, x, put k v y))
   
(* That’s the 8 lines of code above.  The below is arguably convenience functions.  *)

and dictof = function [] -> None | (k, v)::xs -> put k v (dictof xs)
and items d = let rec iter d tl = match d with None -> tl
  | Some(Node(k, v, x, y)) -> iter x ((k, v) :: iter y tl)
              in iter d []
and invert d = dictof (List.map (fun (k, v) -> (v, k)) (items d))

let digits = dictof ["one", 1; "two", 2; "three", 3; "four", 4; "five", 5;
                     "six", 6; "seven", 7; "eight", 8; "nine", 9; "zero", 0] ;;

print_endline (List.fold_right (^) (List.map (fun d -> match get d digits with Some n -> string_of_int n) ["one"; "seven"; "two"; "three"]) "")


(* Maybe a better definition would be

type ('a, 'b) dict = Node of 'a * 'b * ('a, 'b) dict * ('a, 'b) dict | Empty

let rec get k = function Empty -> None
  | Node(k1, v1, _, _) when k1 = k -> Some v1
  | Node(k1, _, x, y) -> if k < k1 then get k x else get k y
and put k v = function Empty -> Node(k, v, Empty, Empty)
  | Node(k1, v1, x, y) when k1 = k -> Node(k, v, x, y)
  | Node(k1, v1, x, y) -> if k < k1 then Node(k1, v1, put k v x, y)
                                    else Node(k1, v1, x, put k v y)

Yeah, I think that’s better.

 *)