(* Let’s statically type a blood test. *)

(* John Cowan posed the following example problem to solve:

> Suppose you have a stream of lab test results, where for each blood
> sample you have in addition to time drawn, time tested, and other
> stuff, you need an object representing the tests performed and their
> values, which could be numbers, or strings.

> However, perhaps the lab does 500 different tests of which only a few
> will be in this blood sample.

> To statically type that, you have to have a structure with 500
> elements, most of which at any time are null.

> Messing with that is ridiculously clunky.

Well, a structure with 500 elements most of which are null is certainly
one possibility, but there are other possible alternatives.
Let’s try some ways of doing this with alists.  The first thing
that occurred to me was a custom alist type for these blood tests:

*)

type test_item = Hematocrit of int | Creatinine of float | Glucose of int
type test_items = EmptyTest | TestCons of test_item * test_items
type test = Test of (int * float * test_items)
let rec hematocrit = function TestCons (Hematocrit x, _) -> Some x
  | EmptyTest -> None
  | TestCons (_, cdr) -> hematocrit cdr

(* But that requires repetitively defining not only a type but also an
   accessor function for each blood test type, which is indeed
   ridiculously clunky.  Instead we could pull the argument out of the
   variant type for the blood test, leaving it as just a simple enum, and
   define a couple of other variant tags to wrap int values and float
   values: *)

type int_tag = HematocritT | GlucoseT
type float_tag = CreatinineT
type by_type_tag = EmptyBTT
                 | BTTConsInt of (int_tag * int * by_type_tag)
                 | BTTConsFloat of (float_tag * float * by_type_tag)
let rec get_int tag = function EmptyBTT -> None
  | BTTConsFloat (_, _, cdr) -> get_int tag cdr
  | BTTConsInt (tag2, v, _) when tag2 == tag -> Some v
  | BTTConsInt (_, _, cdr) -> get_int tag cdr

(* Moreover we don’t really need to define our own list type; we can
   use the standard list type, although we can’t use List.assoc,
   since our keys are wrapped in an item that associates them with a
   value of the right type: *)

type int_item = HematocritI | GlucoseI
type float_item = CreatinineI
type item = IntItem of int_item * int | FloatItem of float_item * float
let rec get_int_item tag = function [] -> None
  | IntItem (tag2, v) :: _ when tag == tag2 -> Some v
  | _ :: cdr -> get_int_item tag cdr
let rec get_float_item tag = function [] -> None
  | FloatItem (tag2, v) :: _ when tag == tag2 -> Some v
  | _ :: cdr -> get_float_item tag cdr

let h_a = get_int_item HematocritI [FloatItem(CreatinineI, 32.1); IntItem(HematocritI, 22)]

(* OCaml’s type system is capable of factoring out the loop in
 common between get_int_item and get_float_item above.  In fact, I
 think this function might be in the standard library.  But it doesn’t
 simplify the code significantly over using explicit recursion: *)

let rec find_if f = function [] -> None
  | car :: cdr -> match f car with
    | None -> get_item f cdr
    | result -> result

let get_int_item_2 tag = find_if (function
  | IntItem (tag2, v) when tag == tag2 -> Some v
  | _ -> None)

(* I think get_int_item above is close to the sweet spot.  We can cut
 it down to less code by using polymorphic variants, but we somewhat
 lose the association between blood tests and primitive types; I don't
 know how to restore that association in a compile-time-checked way
 when using polymorphic variants, although the commented-out code
 below demonstrates how we can detect it at run time.  Also note that
 this version has an easy path to using it with the standard library
 map type; to do that with the above approach, you'd need separate
 maps for ints and floats. *)
exception WrongTagType
let rec iget tag = function
  | [] -> None
  | (tag2, `Int v) :: _ when tag == tag2 -> Some v
(* | (tag2, _) :: _ when tag == tag2 -> raise WrongTagType *) (* this detects the two errors mentioned below, but at run time *)
  | _ :: cdr -> iget tag cdr

let rec fget tag = function
  | [] -> None
  | (tag2, `Float v) :: _ when tag == tag2 -> Some v
  | _ :: cdr -> fget tag cdr

let h1 = iget `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Int 32]
and h2 = iget `Hematocrit [`Creatinine, `Float 2.4]
and h3 = iget `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Float 32.21] (* erroneous, incorrectly None *)
and h4 = iget `BloodPressure [`Hematocrit, `Int 32]
and h5 = iget `Hematocrit [`Hematocrit, `Float 2.4; `Hematocrit, `Int 35] (* erroneous, invalid data structure not detected *)
and c3 = fget `Creatinine [`Creatinine, `Float 2.4; `Hematocrit, `Int 32]

(* That structure *does* permit us to use List.assoc, which raises
   Not_found if not found: *)

let iget2 tag lst = match List.assoc tag lst with `Int v -> v | _ -> raise WrongTagType
let fget2 tag lst = match List.assoc tag lst with `Float v -> v | _ -> raise WrongTagType
let h1p = iget2 `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Int 32]

(* And how about that separate-maps approach?  I did manage to get it
   to work, but it does seem ridiculously clunky.  Jane Street’s OCaml
   Core library simplifies this considerably, but I’m not using it here. 
*)
module IntKey = struct type t = Hematocrit | Glucose  let compare = compare end
module FloatKey = struct type t = Creatinine          let compare = compare end
module IntMap = Map.Make(IntKey)
module FloatMap = Map.Make(FloatKey)
type maps_test = MTest of (int IntMap.t * float FloatMap.t)
let miget tag (MTest(im, _)) = IntMap.find tag im
and mfget tag (MTest(_, fm)) = FloatMap.find tag fm

let hm1 = miget IntKey.Hematocrit (MTest(IntMap.add IntKey.Hematocrit 29 IntMap.empty, FloatMap.empty))

(* Suppose we have several different types of thing similar to the
   blood test, but with different sets of field names, and we’d like to
   avoid rewriting get_int_item and get_float_item for each of them.  In
   OCaml we can do this with a functor similar to Map.Make.  But, without
   resorting to macro processing, I don’t think there’s a way to
   abstract it over the set of field value types, so that you can, for
   example, add a field of type (int * int) later on: *)

module type Key = sig
  type int_key
  type float_key
end

module StaticallyTypedAlist(K: Key) = struct
  type item = Int of K.int_key * int
            | Float of K.float_key * float
  let rec get_int tag = function [] -> None
    | Int (tag2, v) :: _ when tag == tag2 -> Some v
    | _ :: cdr -> get_int tag cdr
  let rec get_float tag = function [] -> None
    | Float (tag2, v) :: _ when tag == tag2 -> Some v
    | _ :: cdr -> get_float tag cdr
end

module Blood = struct
  type int_key = Hematocrit | Glucose
  type float_key = Creatinine
end

module Test = StaticallyTypedAlist(Blood)
let mh1 = Test.get_int Blood.Hematocrit [Test.Float(Blood.Creatinine, 22.5);
                                         Test.Int(Blood.Hematocrit, 18)]

(* OCaml is also surprisingly good at handling a structure with 500
   elements, most of which at any time are null; you do
   unfortunately have to declare all 500 fields twice, as far as I
   know, but after that you only have to specify the ones you’re
   setting to non-None values.  Here 500 is 3: *)

type bloodtest = {
  hematocrit: int option;
  creatinine: float option;
  glucose: int option; 
}
let test = { hematocrit = None; creatinine = None; glucose = None }

(* Using it looks like this: *)

let mytest = { test with glucose = Some 115 }
let g = mytest.glucose
and c = mytest.creatinine
   
(* There is, however, no equivalent of C’s offsetof macro in OCaml; there
   is no way to reify such a field selector as a value. *)

(* You could instead use OCaml’s optional arguments in a case like this to
   reduce syntactic clutter where a bloodtest is instantiated, though
   the definition of the constructing function is still rather repetitive: *)

let bt ?hematocrit ?creatinine ?glucose () = {
  hematocrit = hematocrit;
  creatinine = creatinine;
  glucose = glucose;
}

let mytest2 = bt ~glucose:115 ()
let g = mytest2.glucose
and c = mytest2.creatinine

(* However, doing that in such a way as to permit the incremental
   construction of bloodtest records would seem to be much more
   awkward. *)