(* A full backtracking regexp matcher in 11 LoC. Prompted by a remark of Dave Long. This is aping the lazy matcher in redl.py, but it implements laziness manually using η-expansion and polymorphic variants using `C and `N, instead of depending on Python generator expressions. This has the somewhat surprising result that although the implementation of the regexp matcher itself is only five lines of code, rather than the ten required in Python, the total amount of code is the same. As an example, this match against /wo*w/ evaluates to true: matches "woooow" (`Cat (`Lit "w", `Cat (`Star (`Lit "o"), `Lit "w"))) The inferred types are amusing to read, but demonstrate that polymorphic variants can make your compilation errors difficult to understand. In fact, I still don’t understand the “> `Cat `Star” part. val any : ([< `C of bool * (unit -> 'a) | `N ] as 'a) -> bool = val map : ('a -> 'b) -> ([< `C of 'a * (unit -> 'c) | `N ] as 'c) -> ([> `C of 'b * (unit -> 'd) | `N ] as 'd) = val iota : int -> int -> ([> `C of int * (unit -> 'a) | `N ] as 'a) = val splits : string -> ([ `C of (string * string) * (unit -> 'a) | `N ] as 'a) = val matches : string -> ([< `Alt of 'a * 'a | `Cat of 'a * 'a | `Lit of string | `Star of 'a > `Cat `Star ] as 'a) -> bool = *) let rec any = function `N -> false | `C (h, t) -> h || any (t ()) and map f = function `N -> `N | `C (a, b) -> `C (f a, fun () -> map f (b ())) and iota m n = if m = n then `N else `C (m, fun () -> iota (m+1) n) let rec splits s = let n = String.length s in map (fun i -> String.sub s 0 i, String.sub s i (n-i)) (iota 0 (n+1)) and matches s = function `Lit t -> s = t | `Cat (h, t) -> any (map (fun (a, b) -> matches a h && matches b t) (splits s)) | `Alt (a, b) -> matches s a || matches s b | `Star r -> s = "" || matches s (`Cat (r, `Star r))