(* Let’s statically type a blood test. *) (* John Cowan posed the following example problem to solve: > Suppose you have a stream of lab test results, where for each blood > sample you have in addition to time drawn, time tested, and other > stuff, you need an object representing the tests performed and their > values, which could be numbers, or strings. > However, perhaps the lab does 500 different tests of which only a few > will be in this blood sample. > To statically type that, you have to have a structure with 500 > elements, most of which at any time are null. > Messing with that is ridiculously clunky. Well, a structure with 500 elements most of which are null is certainly one possibility, but there are other possible alternatives. Let’s try some ways of doing this with alists. The first thing that occurred to me was a custom alist type for these blood tests: *) type test_item = Hematocrit of int | Creatinine of float | Glucose of int type test_items = EmptyTest | TestCons of test_item * test_items type test = Test of (int * float * test_items) let rec hematocrit = function TestCons (Hematocrit x, _) -> Some x | EmptyTest -> None | TestCons (_, cdr) -> hematocrit cdr (* But that requires repetitively defining not only a type but also an accessor function for each blood test type, which is indeed ridiculously clunky. Instead we could pull the argument out of the variant type for the blood test, leaving it as just a simple enum, and define a couple of other variant tags to wrap int values and float values: *) type int_tag = HematocritT | GlucoseT type float_tag = CreatinineT type by_type_tag = EmptyBTT | BTTConsInt of (int_tag * int * by_type_tag) | BTTConsFloat of (float_tag * float * by_type_tag) let rec get_int tag = function EmptyBTT -> None | BTTConsFloat (_, _, cdr) -> get_int tag cdr | BTTConsInt (tag2, v, _) when tag2 == tag -> Some v | BTTConsInt (_, _, cdr) -> get_int tag cdr (* Moreover we don’t really need to define our own list type; we can use the standard list type, although we can’t use List.assoc, since our keys are wrapped in an item that associates them with a value of the right type: *) type int_item = HematocritI | GlucoseI type float_item = CreatinineI type item = IntItem of int_item * int | FloatItem of float_item * float let rec get_int_item tag = function [] -> None | IntItem (tag2, v) :: _ when tag == tag2 -> Some v | _ :: cdr -> get_int_item tag cdr let rec get_float_item tag = function [] -> None | FloatItem (tag2, v) :: _ when tag == tag2 -> Some v | _ :: cdr -> get_float_item tag cdr let h_a = get_int_item HematocritI [FloatItem(CreatinineI, 32.1); IntItem(HematocritI, 22)] (* OCaml’s type system is capable of factoring out the loop in common between get_int_item and get_float_item above. In fact, I think this function might be in the standard library. But it doesn’t simplify the code significantly over using explicit recursion: *) let rec find_if f = function [] -> None | car :: cdr -> match f car with | None -> get_item f cdr | result -> result let get_int_item_2 tag = find_if (function | IntItem (tag2, v) when tag == tag2 -> Some v | _ -> None) (* I think get_int_item above is close to the sweet spot. We can cut it down to less code by using polymorphic variants, but we somewhat lose the association between blood tests and primitive types; I don't know how to restore that association in a compile-time-checked way when using polymorphic variants, although the commented-out code below demonstrates how we can detect it at run time. Also note that this version has an easy path to using it with the standard library map type; to do that with the above approach, you'd need separate maps for ints and floats. *) exception WrongTagType let rec iget tag = function | [] -> None | (tag2, `Int v) :: _ when tag == tag2 -> Some v (* | (tag2, _) :: _ when tag == tag2 -> raise WrongTagType *) (* this detects the two errors mentioned below, but at run time *) | _ :: cdr -> iget tag cdr let rec fget tag = function | [] -> None | (tag2, `Float v) :: _ when tag == tag2 -> Some v | _ :: cdr -> fget tag cdr let h1 = iget `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Int 32] and h2 = iget `Hematocrit [`Creatinine, `Float 2.4] and h3 = iget `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Float 32.21] (* erroneous, incorrectly None *) and h4 = iget `BloodPressure [`Hematocrit, `Int 32] and h5 = iget `Hematocrit [`Hematocrit, `Float 2.4; `Hematocrit, `Int 35] (* erroneous, invalid data structure not detected *) and c3 = fget `Creatinine [`Creatinine, `Float 2.4; `Hematocrit, `Int 32] (* That structure *does* permit us to use List.assoc, which raises Not_found if not found: *) let iget2 tag lst = match List.assoc tag lst with `Int v -> v | _ -> raise WrongTagType let fget2 tag lst = match List.assoc tag lst with `Float v -> v | _ -> raise WrongTagType let h1p = iget2 `Hematocrit [`Creatinine, `Float 2.4; `Hematocrit, `Int 32] (* And how about that separate-maps approach? I did manage to get it to work, but it does seem ridiculously clunky. Jane Street’s OCaml Core library simplifies this considerably, but I’m not using it here. *) module IntKey = struct type t = Hematocrit | Glucose let compare = compare end module FloatKey = struct type t = Creatinine let compare = compare end module IntMap = Map.Make(IntKey) module FloatMap = Map.Make(FloatKey) type maps_test = MTest of (int IntMap.t * float FloatMap.t) let miget tag (MTest(im, _)) = IntMap.find tag im and mfget tag (MTest(_, fm)) = FloatMap.find tag fm let hm1 = miget IntKey.Hematocrit (MTest(IntMap.add IntKey.Hematocrit 29 IntMap.empty, FloatMap.empty)) (* Suppose we have several different types of thing similar to the blood test, but with different sets of field names, and we’d like to avoid rewriting get_int_item and get_float_item for each of them. In OCaml we can do this with a functor similar to Map.Make. But, without resorting to macro processing, I don’t think there’s a way to abstract it over the set of field value types, so that you can, for example, add a field of type (int * int) later on: *) module type Key = sig type int_key type float_key end module StaticallyTypedAlist(K: Key) = struct type item = Int of K.int_key * int | Float of K.float_key * float let rec get_int tag = function [] -> None | Int (tag2, v) :: _ when tag == tag2 -> Some v | _ :: cdr -> get_int tag cdr let rec get_float tag = function [] -> None | Float (tag2, v) :: _ when tag == tag2 -> Some v | _ :: cdr -> get_float tag cdr end module Blood = struct type int_key = Hematocrit | Glucose type float_key = Creatinine end module Test = StaticallyTypedAlist(Blood) let mh1 = Test.get_int Blood.Hematocrit [Test.Float(Blood.Creatinine, 22.5); Test.Int(Blood.Hematocrit, 18)] (* OCaml is also surprisingly good at handling a structure with 500 elements, most of which at any time are null; you do unfortunately have to declare all 500 fields twice, as far as I know, but after that you only have to specify the ones you’re setting to non-None values. Here 500 is 3: *) type bloodtest = { hematocrit: int option; creatinine: float option; glucose: int option; } let test = { hematocrit = None; creatinine = None; glucose = None } (* Using it looks like this: *) let mytest = { test with glucose = Some 115 } let g = mytest.glucose and c = mytest.creatinine (* There is, however, no equivalent of C’s offsetof macro in OCaml; there is no way to reify such a field selector as a value. *) (* You could instead use OCaml’s optional arguments in a case like this to reduce syntactic clutter where a bloodtest is instantiated, though the definition of the constructing function is still rather repetitive: *) let bt ?hematocrit ?creatinine ?glucose () = { hematocrit = hematocrit; creatinine = creatinine; glucose = glucose; } let mytest2 = bt ~glucose:115 () let g = mytest2.glucose and c = mytest2.creatinine (* However, doing that in such a way as to permit the incremental construction of bloodtest records would seem to be much more awkward. *)