(** opam switch 4.02.0+modular-implicits *)
(** Many programming languages support some form of overloading.
These notes introduce "modular implicits", an extension to OCaml
that supports a parametric form of overloading based on the module
system.
Many common functions operate on of a large variety of values.
For example, functions for equality, comparison and hashing can be
defined for any first-order data types. OCaml provides built-in
polymorphic versions of these functions that operate on the
structure of values:
*)
let () = begin
(* '=' works on integers *)
assert (3 = 3);
(* '=' works on structured values *)
assert ([1;2;3] = [1;2;3]);
(* '=' breaks abstraction boundaries *)
let module CharSet = Set.Make(Char) in
assert (not (CharSet.of_list ['a'; 'b'] = CharSet.of_list ['b'; 'a']));
(* '=' compares values by their runtime representation, ignoring type *)
let module E = struct
(* an existential type *)
type t = E : _ -> t
end in
assert
(* the integer zero and the empty list have the same
representation at runtime *)
(E.E 0 = E.E [])
end
(** Similarly, arithmetic operations can be defined for a variety of
types. In OCaml the arithmetic operations for each type have
distinct names:
*)
(* Sum integers using '+' *)
let sum_integers l = List.fold_left (+) 0 l
(* Sum floats using '+.' *)
let sum_floats l = List.fold_left (+.) 0.0 l
(** Modular implicits address both these use cases, without either
violating parametricity and breaking abstraction (like polymorphic
equality) or requiring distinct names for each type-specific
variant of a function (like OCaml's arithmetic functions).
As we shall see, with modular implicits we can write a single
function that sums a list of numbers, regardless of whether they
are ints, floats, or some other (perhaps user-defined) type.
Furthermore, with modular implicits the compiler will take on the
work of assembling suitable functions from smaller pieces. For
example, a common way to write pretty-printing functions is to
define a family of functions, one for each type, with
parameterised functions for parameterised types:
*)
let show_int : int -> string =
string_of_int
let show_list : 'a. ('a -> string) -> 'a list -> string =
fun f l -> "["^ String.concat ", " (List.map f l) ^"]"
(** In addition to defining these functions the user must also combine
them -- for example, to define a printer for lists of ints:
*)
let show_int_list = show_list show_int
(** With modular implicits this combining work, where the application
of functions (like show_list show_int) typically parallels the
application of type constructors (like int list) can be left to
the compiler.
*)
(** It is useful to distinguish two forms of polymorphism:
* *parametric* polymorphism as found in System F (∀, Λ)
and in Hindley-Milner type systems like ML's and Haskell's.
With parametric polymorphism the behaviour of a function
is the same at every type: for example, 'map' always
applies its first argument to every element of its second
argument, regardless of the element type of the list.
In a system with type inference (like OCaml's) the inference
algorithm analyses a program to determine where to add types,
and which types to add:
λx.x (unannotated program)
↝ Λα.λx:α.x (type inference inserts a Λ-abstraction)
f 3 (unannotated program)
↝ f [Int] 3 (type inference inserts a type application)
* *ad-hoc* polymorphism, like modular implicits or Haskell's
type classes.
With ad-hoc polymorphism the behaviour of a function varies
according to the type: for example, the output of an ad-hoc
polymorphic 'show' function is different for integers and
for lists.
A system for ad-hoc polymorphism in the style of modular
implicits or type classes also involves inference, and the
inference algorithm analyses a program to determine where
to add terms, and which terms to add.
show 3 (unannotated program)
↝ show {Show_int} 3 (inference inserts an implicit argument)
*)
(** We start with a simple example: numbers with addition.
We will define a signature (interface), some implementations, and
some overloaded functions, and illustrate how the implicit
inference mechanism selects and inserts an appropriate implementation
each time an overloaded function is used.
Since implicits are based on OCaml's module system, the interface
is a straightforward OCaml module type:
*)
module type NUM = sig
type t
val zero : t
val add : t -> t -> t
end
(** It is convenient to define top-level functions that simply
project out each argument from the signature.
The syntax {N:NUM} denotes an implicit parameter N of type NUM.
*)
(* val add : {N:NUM} -> N.t -> N.t -> N.t *)
let add {N:NUM} x y = N.add x y
(** Implicit implementations are standard OCaml module definitions
preceded by the keyword 'implicit'. As we shall see, some care
is needed when deciding which modules to make implicit to avoid
ambiguity. *)
(* An implicit NUM implementation for integers *)
implicit module Num_int : NUM with type t = int = struct
type t = int
let zero = 0
let add x y = x + y
end
(* An implicit NUM implementation for floats *)
implicit module Num_float : NUM with type t = float = struct
type t = float
let zero = 0.0
let add x y = x +. y
end
(** We can define an overloaded 'sum' function that works
for any type of numbers (or, more precisely, for any
type that has an implicit NUM implementation).
As with 'add', {N:NUM} denotes an implicit parameter N
of type NUM.
*)
let sum: {N:NUM} -> N.t list -> N.t =
fun {N:NUM} (l : N.t list) ->
List.fold_left N.add N.zero l
(** Then we can use 'sum' with lists of integers or lists of floats.
The compiler inserts the appropriate implicit argument.
*)
let x1 = sum [1; 2; 3]
let x2 = sum [1.0; 2.0; 3.0]
(** Alternatively, we can provide the implicit arguments explicitly.
This facility is sometimes useful when implicit resolution would
encounter an ambiguity *)
let x3 = sum {Num_int} [1;2;3]
let x4 = sum {Num_float} [1.0;2.0;3.0]
(** It is also possible to define impilcit implementations defined by
other implicit implementations -- i.e. implicit functors.
Here is an implicit module Num_pair that builds NUM implementations
for pairs from NUM implementations for each elememt.
Since the functor arguments {A:NUM} and {B:NUM} are implicit they
can be instantiated automatically by the implicit resolution
mechanism.
*)
implicit module Num_pair{A:NUM}{B:NUM} = struct
type t = A.t * B.t
let zero = (A.zero, B.zero)
let add (a1, b1) (a2, b2) =
(A.add a1 a2, B.add b1 b2)
end
(** With Num_pair defined we can apply 'sum' to pairs of numbers: *)
let x5 = sum [(10, 1.0); (20, 2.0)]
(** ... or to pairs of pairs of numbers *)
let x6 = sum [(100, (10, 1.0)); (200, (20, 2.0))]
(** The syntax for explicitly supplying implicit functors as implicit
arguments is as follows: *)
let x7 = sum {Num_pair{Num_int}{Num_float}}
[(10, 1.0); (20, 2.0)]
(** A number of common patterns can be conveniently expressed with
modular implicits.
For example, inheritance can be straightforwardly expressed
by including "super-interfaces" as members.
Here is an interface FRACTIONAL that extends NUM with
an additional 'div' function:
*)
module type FRACTIONAL = sig
type t
(* Include a NUM implementation as a member, with a constraint
to ensure that the types are the same *)
module Num : NUM with type t = t
val div : t -> t -> t
end
(** A top-level function to project 'div' *)
let div {F:FRACTIONAL} x y = F.div x y
(** A top-level functor to project 'Num'.
With this functor defined, the implicit resolution mechanism
can build an implicit NUM argument from any implicit FRACTIONAL
implementation. *)
implicit module Num_fractional{F:FRACTIONAL} = F.Num
(** Here is an implementation of FRACTIONAL for integers
that builds on the NUM implementation for integers *)
implicit module Fractional_int =
struct
type t = int
module Num = Num_int
let div n d = n / d
end
(** The Num_fractional implementation makes it possible to
freely mix 'add' and 'div' in expressions.
In the following function, the compiler inserts the the implicit
argument {F} in the call to 'div' and the implicit argument
{Num_fractional{F}} in the call to 'add': *)
let x8 {F:FRACTIONAL} (x:F.t) (y:F.t) = div (add x y) x
(** Implicit resolution: finding a match
How does the implicit resolution mechanism work? For the full
details, see the paper:
Modular implicits (2015)
Leo White, Frédéric Bour and Jeremy Yallop
https://www.cl.cam.ac.uk/~jdy22/papers/modular-implicits.pdf
The following is a brief high-level overview.
Consider the following call to an overloaded function:
add 3.0 4.0
From the type of 'add' it is evident that an implicit argument of
type NUM is needed. Initially both the argument and the instantiation
of the 't' type member are unknown. However, the type of 'add' shows
that the type 't' must be the same as the types of the other two
(non-implicit) arguments to 'add', so the compiler can insert a
unification variable:
add {?:NUM with type t = 'a} (3.0 : 'a) (4.0 : 'a)
Both '3.0' and '4.0' have type 'float', so the instantiation of 't'
is also constrained to 'float':
add {?:NUM with type t = float} (3.0 : float) (4.0 : float)
Now that the type of the implicit argument is fully known,
all that remains is to locate an argument of that type.
There are three candidates: Num_int, Num_float and Num_pair.
It is clear that only Num_float.t is compatible with float,
and so the search concludes, inserting the implicit argument:
add {Num_float} 3.0 4.0
*)
(** Implicit resolution: determining whether a module matches.
In the NUM example all the implicit modules clearly matched
the required signature of the implicit argument.
In larger examples the question of whether an implicit module is a
suitable match may be more involved.
In general, a module M is a suitable match for S (i.e. an
"instance of S") if M has type T and T is a subtype of S. The
subtyping (inclusion) relationship between modules is already part
of the standard OCaml language; modular implicits put simply put
the existing subtyping relationship to a new use.
Here is a simple way to test whether a module type T is a subtype
of another module type S. If the functor definition
module F(X: T) : S = X
is accepted then T is a subtype of S.
There follow some illustrations of various facets of module
subtyping in OCaml.
*)
(** A module subtype can add elements --- types, values, modules, etc.
In the following example the module subtype adds a type member 'b'
and a value member 'y':
*)
module F1
(X: sig
type a and b
val x : a
val y : b
end) :
sig
type a
val x : a
end
= X
(** A module subtype can instantiate abstract types in the supertype.
In the following example the module subtype instantiates the
abstract type 'a' to 'int':
*)
module F2
(X: sig
type a = int
end) :
sig
type a
end
= X
(** Dually, a module subtype can add polymorphism.
In the following example the module subtype makes the value member
'x' more polymorphic than the corresponding member in the
supertype:
*)
module F3
(X: sig
val x : 'a list
end) :
sig
val x : int list
end =
X
(** For functor types, the argument signature behaves contravariantly
with respect to subtyping.
In the following example the type
functor (Y:sig end) -> sig end)
is a subtype of the type
functor (Y:sig type t end) -> sig end
because
sig end
is a *supertype* of
sig type t end
*)
module F4
(X: functor (Y:sig end) -> sig end)
: functor (Y:sig type t end) -> sig end
= X
(** Ambiguity
The job of implicit resolution is to locate the instance that
satisfies the type constraints. What if there is more than one
suitable instance?
For example, here is a signature SHOW for types whose values can
be converted to strings:
*)
module type SHOW =
sig
type t
val show : t -> string
end
(** And here are instance of SHOW for 'bool' and 'int': *)
implicit module Show_bool =
struct
type t = bool
let show = string_of_bool
end
implicit module Show_int =
struct
type t = int
let show = string_of_int
end
(** ... and a top-level projection function for the 'show' member: *)
let show {S:SHOW} (x: S.t) = S.show x
(** We might like to define a function 'print' that uses an implicit
SHOW instance to write a value to stdout
let print x = show x
However, compiling this definition produces an error message:
Error: Ambiguous implicit S: Show_int and Show_bool
are both solutions.
The problem is as follows: after inserting the initial constraint
let print x = show {?:SHOW with type t = 'a} (x: 'a}
there are no further constraints on the type 'a, and so
either Show_bool or Show_int would be a suitable choice
for the implicit argument.
There are two ways to resolve the ambiguity: we can either
generalize 'print' to work on all types with SHOW instances, or we
can specialize it to work with some particular instance.
*)
(** A generalized version of 'print'. Now that 'x' is constrained
to have the type S.t the generated constraint identifies a single
implementation, namely 'S':
let print {S:SHOW} x = show {?:SHOW with type t = S.t} (x: S.t}
*)
let print {S:SHOW} (x:S.t) = show x
(** A specialized version of 'print'. Now that 'x' is constrained
to have the type int the generated constraint identifies a single
implementation, namely Show_int:
let print {S:SHOW} x = show {?:SHOW with type t = int} (x: int}
*)
let print_int (x:int) = show x
(** Not all cases of ambiguity can be resolved so easily.
For example, here is a second implicit instance of SHOW
for booleans:
*)
implicit module Show_boolean = struct
type t = bool
let show s = Printf.sprintf "%b" s
end
(** With both Show_bool and Show_boolean visible, any implicit resolution
problem that generates the constraint
{?:SHOW with type t = bool}
will have two solutions. How should resolution choose the
implementation to insert?
One possibility is to pick the most recent, in a similar manner
to variable shadowing:
let x = 3 in
let x = 4 in
x
Another possibility is to choose the 'best' match (although
determining which match is 'best' is likely to be complicated,
given the complexity of module subtyping).
The modular implicits design takes the following simple approach:
ambiguity at the point of definition is permitted
(so there is no problem with *defining* Show_bool and
Show_boolean), but
ambiguity at the point of use is prohibited
(so it is an error if both Show_bool and Show_boolean are solutions
to an implicit resolution problem)
*)
(** Elaboration (part I)
Modular implicits are closely integrated into the existing OCaml
language. We have seen above that the search for an implicit
match is based on the existing module subtyping relation. As a
second point of integration, programs that use implicits can be
translated into implicit free programs in the existing OCaml
language.
Then the treatment of implicits has two steps:
1. Resolve and instantiate implicit arguments
2. Translate implicits to existing OCaml constructs
The translation of implicits involves packages (sometimes called
"first-class modules"), which we now briefly introduce.
*)
(** Packages ("First-class modules")
Like most of the language constructs we have seen so far, packages
involve three components: a family of types, an introduction form
and an elimination form.
*)
(** Here is a sample signature S, and an implementation M of S *)
module type S = sig type s type t val x: int end
module M = struct type s type t = int let x = 0 end
(** A package type corresponding to S can be defined as follows *)
type s = (module S)
(** The introduction form is similar. The rhs of the following
definition packages M as a package value *)
let p = (module M:S)
(** The elimination form uses the keyword 'val' to turn a package
value back into a regular module *)
module M = (val p)
(** There is also some convenient syntactic sugar that allows
module unpacking in patterns: *)
let g = fun (module M : S) -> M.x + 1
(** The definition above is syntactic sugar for the following more
explicit code: *)
let g = fun (m : (module S)) -> let module M = (val m) in M.x + 1
(** Finally, package types can be given constraints.
The constraint language is more restricted than the constraint
language for regular modules; for example, only unparameterised
types are supported *)
type 'a s2 = (module S with type s = 'a
and type t = int)
(** Elaboration (part II)
The modular implicits elaboration turns each program with
implicits into a program with packages.
More precisely, each function with an implicit argument becomes a
functor package (first-class functor), and each application of
such a function becomes an application of a functor.
For example, the 'sum' function above
let sum: {N:NUM} -> N.t list -> N.t =
fun {N:NUM} l -> fold_left N.add N.zero l
is elaborated into a package value whose type (SUM_TYPE) is a
packaged functor type:
*)
module type SUM_TYPE =
functor(N:NUM) -> sig val v : N.t list -> N.t end
let sum : (module SUM_TYPE) =
(module functor (N:NUM) ->
struct
let v = fun l -> List.fold_left N.add N.zero l
end)
(** ... and elaborating an application of sum involves
first inserting implicit arguments:
sum [1;2;3]
↝ sum {Num_int} [1;2;3]
and then turning the implicit application into a functor application
*)
let x11 =
let module Sum = (val sum)(Num_int) in
Sum.v [1;2;3]
(** Although modular implicits don't add new expressive
power to the language (since every program with implicits
can be translated into an implicit-free program), they
make some kinds of program significantly more convenient.
For example, programs involving higher kinds are often cumbersome
to express without implicits, but easy to express using implicits.
Here is an example: suppose we would like to define a function
'fmap' that generalizes the 'map' for lists to arbitrary container
types, including lists and options:
fmap succ [1; 2; 3] ↝ [2; 3; 4]
fmap succ (Some 6) ↝ Some 7
We might then use 'fmap' to write other functions, like 'replace'
that replaces every element of a container with a particular value:
replace () [1; 2; 3] ↝ [(); (); ()]
replace () (Some "a") ↝ (Some ())
What should the type of fmap be? It should generalize the types
of map_list and map_option
val map_list : ('a -> 'b) -> 'a list -> 'b list
val map_option : ('a -> 'b) -> 'a option -> 'b option
which suggests that it needs higher kinds, since only a type
variable of kind * ⇒ * (not available in OCaml) can stand in
for both 'list' and 'option'.
Here is a solution using modular implicits. First, we define a
signature FUNCTOR with
a parameterised type 't' that can be instantiated to either 'list'
or 'option, and
a value member 'fmap' that turns a function on elements into
a function on containers:
*)
module type FUNCTOR = sig
type +'a t
val fmap : ('a -> 'b) -> 'a t -> 'b t
end
(** Then we can define the standard fmap projection function,
and an overloaded function 'replace' parameterised by an
implicit FUNCTOR argument: *)
let fmap {F:FUNCTOR} f x = F.fmap f x
let replace {F:FUNCTOR} x (c : _ F.t) = fmap (fun _ -> x) c
(** It is straightforward to define implicit FUNCTOR instances
for both list and option: *)
implicit module Functor_list = struct
type 'a t = 'a list
let fmap = List.map
end
implicit module Functor_option = struct
type 'a t = 'a option
let fmap f = function
| None -> None
| Some v -> Some (f v)
end
(** Finally, here are 'fmap' and 'replace' in action: *)
let x12 = fmap succ [1; 2; 3]
let x13 = fmap succ (Some 6)
let x14 = replace () [1; 2; 3]
let x15 = replace () (Some "a")