4

I'm working on an implementation of the HMF type system described in this paper by Daan Leijen.

The inference algorithm for HMF is defined in section 6.3. Here is the rule for let expressions (pseudocode):

infer(Γ, let x = e1 in e2) =
    let (θ1, σ1) = infer(Γ, e1)
    let (θ2, σ2) = infer((θ1Γ, x : σ1), e2)
    return (θ2θ1, σ2)

This rule clearly doesn't work for recursive definitions, because x is not added to the environment when e1 is inferred. I tried to extend the algorithm so that it could handle recursion:

infer(Γ, let x = e1 in e2) =
    let (θ1, σ1) = infer((Γ, x : α), e1)
    let θ2 = unify(θ1 α, σ1)
    let σ1′ = generalize(θ2Γ, σ1)
    let (θ3, σ2) = infer((θ2Γ, x : σ1′), e2)
    return (θ3θ2, σ2)

Is this new algorithm sound? My Haskell implementation isn't functioning properly for some recursive definitions, so I suspect I did something wrong (although the mistake might be in another part of my code). Type inference algorithms are extremely difficult to get right, because forgetting to apply a single substitution can make the entire thing fall apart.

My code is below, except for the definitions of some helper functions, which I've omitted for brevity. Note that θ and σ are written as s and t, respectively.

{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE PatternSynonyms #-}

import qualified Data.Bifunctor as Bf
import qualified Control.Monad as Monad
import Control.Arrow ((>>>))

import qualified Data.Map as Map
import qualified Data.Set as Set
import Data.Map (Map)
import Data.Set (Set, (\\))

import qualified Control.Monad.Except as Except
import qualified Control.Monad.Reader as Reader
import qualified Control.Monad.State as State
import Control.Monad.Except (ExceptT)
import Control.Monad.Reader (ReaderT)
import Control.Monad.State (State)

type Name = String

data Type
  = TCon Name       --type constructor
  | TVar Name       --type variable
  | TCall Type Type --application (e.g. Maybe Int)
  | TAny Name Type  --universal quantification
  deriving (Eq)

--function type
pattern TArr :: Type -> Type -> Type
pattern TArr t1 t2 = TCall (TCall (TCon "->") t1) t2

--check if type is polymorphic
polymorphic :: Type -> Bool
polymorphic = \case
  TCon _ -> False
  TVar _ -> False
  TCall t1 t2 -> polymorphic t1 || polymorphic t2
  TAny _ _ -> True

data Expr
  = Int Int                       --integer literal
  | Var Name                      --variable
  | Let Name Expr Expr            --let expression
  | Lambda Name (Maybe Type) Expr --lambda (possibly annotated)
  | Call Expr Expr                --function application
  | Annotate Expr Type            --type annotation

--check if expression is annotated
annotated :: Expr -> Bool
annotated = \case
  Annotate _ _ -> True
  _ -> False

--typing environment
type Env = Map Name Type

--type variable substitution
type Subst = Map Name Type

nullSubst :: Subst
nullSubst = Map.empty

--compose two substitutions
infixr 9 `compose`
compose :: Subst -> Subst -> Subst
compose s2 s1 = s2 <> apply s2 s1

class Substitutable a where
  apply :: Subst -> a -> a
  freeVars :: a -> Set Name

instance Substitutable Type where
  apply s t = undefined  --apply a substitution to a type
  freeVars t = undefined --get the free variables of a type

instance Substitutable a => Substitutable (Map k a) where
  apply s = fmap (apply s)
  freeVars = foldMap freeVars

--infinite list of fresh variables
data Fresh = Cons Name Fresh

--type inference monad
type Infer = ExceptT String (ReaderT Env (State Fresh))

--add a variable to the environment
extendEnv :: Name -> Type -> Infer a -> Infer a
extendEnv n t = Reader.local (Map.insert n t)

--apply a substitution to the environment
applyEnv :: Subst -> Infer a -> Infer a
applyEnv s = Reader.local (apply s)

freshName :: Infer Name
freshName = undefined

--get a fresh type variable
freshVar :: Infer Type
freshVar = TVar <$> freshName

--replace the free variables in a type with fresh ones
substFresh :: Type -> Infer Type
substFresh t = undefined

--replace the outermost quantified variables with fresh free variables
--return the new variables and the new type
unforallFresh :: Type -> Infer ([Name], Type)
unforallFresh = undefined

instantiate :: Type -> Infer Type
instantiate t = snd <$> unforallFresh t

--quantify all free variables that are not free in the environment
generalise :: Type -> Infer Type
generalise t = undefined

--infer the type of an expression and return a substitution
inferType :: Expr -> Infer (Subst, Type)
inferType = \case
  Int _ -> return (nullSubst, TCon "Int")

  Var n -> do
    env <- Reader.ask
    case Map.lookup n env of
      Nothing -> Except.throwError "variable is not defined"
      Just t -> return (nullSubst, t)

  Let n x1 x2 -> do
    var <- freshVar
    (s1, t1) <- extendEnv n var (inferType x1)
    s2 <- unify (apply s1 var) t1
    (s3, t2) <- applyEnv s2 do
      t1' <- generalise t1
      extendEnv n t1' (inferType x2)
    return (s3 `compose` s2, t2)

  Lambda n Nothing x -> do
    var <- freshVar
    (s, t) <- extendEnv n var (inferType x)
    Monad.when (polymorphic $ apply s var)
      (Except.throwError "polymorphic argument type inferred for lambda")
    t' <- if annotated x then return t else instantiate t
    applyEnv s do
      tf <- generalise $ apply s (TArr var t')
      return (s, tf)

  Lambda n (Just anno) x -> do
    anno' <- substFresh anno
    (s, t) <- extendEnv n anno' (inferType x)
    t' <- if annotated x then return t else instantiate t
    applyEnv s do
      tf <- generalise $ apply s (TArr anno' t')
      return (s, tf)

  Call x1 x2 -> inferCall False x1 x2

  Annotate x t -> do
    let fun = Lambda "_" (Just t) (Var "_")
    inferCall True fun x

--infer the type of a function call
--if the first argument is True, no subsumption will occur
inferCall :: Bool -> Expr -> Expr -> Infer (Subst, Type)
inferCall rigid x1 x2 = do
  (s0, t1) <- inferType x1
  (s1, t1a, t1b) <- unifyArr =<< instantiate t1
  (s2, t2) <- applyEnv (s1 `compose` s0) (inferType x2)
  let uni = if rigid then unify else subsume
  s3 <- uni (apply s2 t1a) t2
  let s4 = s3 `compose` s2 `compose` s1 `compose` s0
  applyEnv s4 do
    t <- generalise (apply s4 t1b)
    return (s4, t)

--get the argument type and return type of a function type
unifyArr :: Type -> Infer (Subst, Type, Type)
unifyArr = \case
  TArr t1 t2 -> return (nullSubst, t1, t2)

  TVar v -> do
    var1 <- freshVar
    var2 <- freshVar
    let s = Map.singleton v (TArr var1 var2)
    return (s, var1, var2)

  _ -> Except.throwError "not a function type"

--attempt to unify two types with a substitution
unify :: Type -> Type -> Infer Subst
unify = curry \case
  (t1, t2) | t1 == t2 -> return nullSubst

  (TVar v, t) -> bind v t
  (t, TVar v) -> bind v t

  (TCall t1 t2, TCall t3 t4) -> do
    s1 <- unify t1 t3
    s2 <- unify (apply s1 t2) (apply s1 t4)
    return (s2 `compose` s1)

  (TAny v1 t1, TAny v2 t2) -> do
    sk <- freshName
    let t1' = apply (Map.singleton v1 $ TVar sk) t1
    let t2' = apply (Map.singleton v2 $ TVar sk) t2
    s <- unify t1' t2'
    if Set.member sk (freeVars s)
      then Except.throwError "skolem contant escaped"
      else return s

  _ -> Except.throwError "failed to unify types"

--create a substitution that binds a variable to a type
--(unless the variable occurs free in said type)
bind :: Name -> Type -> Infer Subst
bind v t
  | Set.member v (freeVars t) = Except.throwError "infinite type"
  | otherwise = return (Map.singleton v t)

--not totally sure what this does
subsume :: Type -> Type -> Infer Subst
subsume t1 t2 = do
  (vars1, t1') <- unforallFresh t1
  (vars2, t2') <- unforallFresh t2
  s <- unify t1' t2'
  let s' = foldr Map.delete s vars2
  if Set.disjoint (Set.fromList vars1) (freeVars s')
    then return s'
    else Except.throwError "skolem constant escaped"

I'm still not sure if my inference of recursive definitions is correct, but I found solutions to two unrelated issues:

  1. Previously, the inferCall function always used subsume instead of unify. I changed it so that it uses unify when the argument is annotated.
  2. The paper states that unification only works properly when the argument types are in "normal form," meaning the variables are quantified in order. (So ∀b. ∀a. a -> b is not in normal form.) I haven't updated the code yet to meet this requirement, but doing so will definitely fix some of the errors I'm getting.
ruakh
  • 175,680
  • 26
  • 273
  • 307
Owen Bechtel
  • 138
  • 5
  • The question about subsumption is not related to the main one, so you should make it a separate post. – radrow Jan 19 '22 at 10:26
  • Could you also show your Haskell implementation? Your logic seems fine, it is hard to debug code that we don't have access to. Also, could you provide some examples where it goes wrong? – radrow Jan 19 '22 at 10:38

0 Answers0