I'm working on an implementation of the HMF type system described in this paper by Daan Leijen.
The inference algorithm for HMF is defined in section 6.3. Here is the rule for let expressions (pseudocode):
infer(Γ, let x = e1 in e2) =
let (θ1, σ1) = infer(Γ, e1)
let (θ2, σ2) = infer((θ1Γ, x : σ1), e2)
return (θ2 ◦ θ1, σ2)
This rule clearly doesn't work for recursive definitions, because x is not added to the environment when e1 is inferred. I tried to extend the algorithm so that it could handle recursion:
infer(Γ, let x = e1 in e2) =
let (θ1, σ1) = infer((Γ, x : α), e1)
let θ2 = unify(θ1 α, σ1)
let σ1′ = generalize(θ2Γ, σ1)
let (θ3, σ2) = infer((θ2Γ, x : σ1′), e2)
return (θ3 ◦ θ2, σ2)
Is this new algorithm sound? My Haskell implementation isn't functioning properly for some recursive definitions, so I suspect I did something wrong (although the mistake might be in another part of my code). Type inference algorithms are extremely difficult to get right, because forgetting to apply a single substitution can make the entire thing fall apart.
My code is below, except for the definitions of some helper functions, which I've omitted for brevity. Note that θ and σ are written as s
and t
, respectively.
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE PatternSynonyms #-}
import qualified Data.Bifunctor as Bf
import qualified Control.Monad as Monad
import Control.Arrow ((>>>))
import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Map (Map)
import Data.Set (Set, (\\))
import qualified Control.Monad.Except as Except
import qualified Control.Monad.Reader as Reader
import qualified Control.Monad.State as State
import Control.Monad.Except (ExceptT)
import Control.Monad.Reader (ReaderT)
import Control.Monad.State (State)
type Name = String
data Type
= TCon Name --type constructor
| TVar Name --type variable
| TCall Type Type --application (e.g. Maybe Int)
| TAny Name Type --universal quantification
deriving (Eq)
--function type
pattern TArr :: Type -> Type -> Type
pattern TArr t1 t2 = TCall (TCall (TCon "->") t1) t2
--check if type is polymorphic
polymorphic :: Type -> Bool
polymorphic = \case
TCon _ -> False
TVar _ -> False
TCall t1 t2 -> polymorphic t1 || polymorphic t2
TAny _ _ -> True
data Expr
= Int Int --integer literal
| Var Name --variable
| Let Name Expr Expr --let expression
| Lambda Name (Maybe Type) Expr --lambda (possibly annotated)
| Call Expr Expr --function application
| Annotate Expr Type --type annotation
--check if expression is annotated
annotated :: Expr -> Bool
annotated = \case
Annotate _ _ -> True
_ -> False
--typing environment
type Env = Map Name Type
--type variable substitution
type Subst = Map Name Type
nullSubst :: Subst
nullSubst = Map.empty
--compose two substitutions
infixr 9 `compose`
compose :: Subst -> Subst -> Subst
compose s2 s1 = s2 <> apply s2 s1
class Substitutable a where
apply :: Subst -> a -> a
freeVars :: a -> Set Name
instance Substitutable Type where
apply s t = undefined --apply a substitution to a type
freeVars t = undefined --get the free variables of a type
instance Substitutable a => Substitutable (Map k a) where
apply s = fmap (apply s)
freeVars = foldMap freeVars
--infinite list of fresh variables
data Fresh = Cons Name Fresh
--type inference monad
type Infer = ExceptT String (ReaderT Env (State Fresh))
--add a variable to the environment
extendEnv :: Name -> Type -> Infer a -> Infer a
extendEnv n t = Reader.local (Map.insert n t)
--apply a substitution to the environment
applyEnv :: Subst -> Infer a -> Infer a
applyEnv s = Reader.local (apply s)
freshName :: Infer Name
freshName = undefined
--get a fresh type variable
freshVar :: Infer Type
freshVar = TVar <$> freshName
--replace the free variables in a type with fresh ones
substFresh :: Type -> Infer Type
substFresh t = undefined
--replace the outermost quantified variables with fresh free variables
--return the new variables and the new type
unforallFresh :: Type -> Infer ([Name], Type)
unforallFresh = undefined
instantiate :: Type -> Infer Type
instantiate t = snd <$> unforallFresh t
--quantify all free variables that are not free in the environment
generalise :: Type -> Infer Type
generalise t = undefined
--infer the type of an expression and return a substitution
inferType :: Expr -> Infer (Subst, Type)
inferType = \case
Int _ -> return (nullSubst, TCon "Int")
Var n -> do
env <- Reader.ask
case Map.lookup n env of
Nothing -> Except.throwError "variable is not defined"
Just t -> return (nullSubst, t)
Let n x1 x2 -> do
var <- freshVar
(s1, t1) <- extendEnv n var (inferType x1)
s2 <- unify (apply s1 var) t1
(s3, t2) <- applyEnv s2 do
t1' <- generalise t1
extendEnv n t1' (inferType x2)
return (s3 `compose` s2, t2)
Lambda n Nothing x -> do
var <- freshVar
(s, t) <- extendEnv n var (inferType x)
Monad.when (polymorphic $ apply s var)
(Except.throwError "polymorphic argument type inferred for lambda")
t' <- if annotated x then return t else instantiate t
applyEnv s do
tf <- generalise $ apply s (TArr var t')
return (s, tf)
Lambda n (Just anno) x -> do
anno' <- substFresh anno
(s, t) <- extendEnv n anno' (inferType x)
t' <- if annotated x then return t else instantiate t
applyEnv s do
tf <- generalise $ apply s (TArr anno' t')
return (s, tf)
Call x1 x2 -> inferCall False x1 x2
Annotate x t -> do
let fun = Lambda "_" (Just t) (Var "_")
inferCall True fun x
--infer the type of a function call
--if the first argument is True, no subsumption will occur
inferCall :: Bool -> Expr -> Expr -> Infer (Subst, Type)
inferCall rigid x1 x2 = do
(s0, t1) <- inferType x1
(s1, t1a, t1b) <- unifyArr =<< instantiate t1
(s2, t2) <- applyEnv (s1 `compose` s0) (inferType x2)
let uni = if rigid then unify else subsume
s3 <- uni (apply s2 t1a) t2
let s4 = s3 `compose` s2 `compose` s1 `compose` s0
applyEnv s4 do
t <- generalise (apply s4 t1b)
return (s4, t)
--get the argument type and return type of a function type
unifyArr :: Type -> Infer (Subst, Type, Type)
unifyArr = \case
TArr t1 t2 -> return (nullSubst, t1, t2)
TVar v -> do
var1 <- freshVar
var2 <- freshVar
let s = Map.singleton v (TArr var1 var2)
return (s, var1, var2)
_ -> Except.throwError "not a function type"
--attempt to unify two types with a substitution
unify :: Type -> Type -> Infer Subst
unify = curry \case
(t1, t2) | t1 == t2 -> return nullSubst
(TVar v, t) -> bind v t
(t, TVar v) -> bind v t
(TCall t1 t2, TCall t3 t4) -> do
s1 <- unify t1 t3
s2 <- unify (apply s1 t2) (apply s1 t4)
return (s2 `compose` s1)
(TAny v1 t1, TAny v2 t2) -> do
sk <- freshName
let t1' = apply (Map.singleton v1 $ TVar sk) t1
let t2' = apply (Map.singleton v2 $ TVar sk) t2
s <- unify t1' t2'
if Set.member sk (freeVars s)
then Except.throwError "skolem contant escaped"
else return s
_ -> Except.throwError "failed to unify types"
--create a substitution that binds a variable to a type
--(unless the variable occurs free in said type)
bind :: Name -> Type -> Infer Subst
bind v t
| Set.member v (freeVars t) = Except.throwError "infinite type"
| otherwise = return (Map.singleton v t)
--not totally sure what this does
subsume :: Type -> Type -> Infer Subst
subsume t1 t2 = do
(vars1, t1') <- unforallFresh t1
(vars2, t2') <- unforallFresh t2
s <- unify t1' t2'
let s' = foldr Map.delete s vars2
if Set.disjoint (Set.fromList vars1) (freeVars s')
then return s'
else Except.throwError "skolem constant escaped"
I'm still not sure if my inference of recursive definitions is correct, but I found solutions to two unrelated issues:
- Previously, the
inferCall
function always usedsubsume
instead ofunify
. I changed it so that it usesunify
when the argument is annotated. - The paper states that unification only works properly when the argument types are in "normal form," meaning the variables are quantified in order. (So
∀b. ∀a. a -> b
is not in normal form.) I haven't updated the code yet to meet this requirement, but doing so will definitely fix some of the errors I'm getting.