0

I'm writing a tool that generates ml stubs from mli files. I have the meaningful mappings complete (var f : int -> float to let f _ = 0.), but I'm having a little trouble reasoning about the AST nodes for classes and modules, specifically the Pmty_* and Pcty_* nodes. These two types of nodes (given the type hierarchy) seem to most most associated with mli files. Some of the trivial members--which appear in -dparsetrees of mli files (like Pmty_ident and Pcty_constr) have obvious mappings to nodes most associated with ml files (again by the hierarchy of the signature and structure types, in the aforementioned example to Pmod_ident and Pcl_constr). However, some of the nodes don't have an obvious parallel. Specifically, I'm having trouble reasoning about:

  • Pmty_with and Pmty_typeof - it seems like these two can never be parsed from a valid mli file; they only occur in a structure context that has a module_type as one of its members (I've checked my suspicions against all of OCaml's parser test files)
  • Pcty_signature - it seems like the only case this can occur is within a Psig_class_type (which I already directly map to Pstr_class_type); I against the OCaml test files and found a Pcty_signature only in this location, but are there other valid locations for it in an mli that I am missing?
  • Pcty_arrow - there are no test files containing this, so I can't be sure where it is valid, but my intuition says this also goes in a class_type context within a structure only (or within one of the other Pcty_*, in which case its conversion would be handled as a child by that node's mapping from Pcty_* to Pcl_*); is this incorrect?

I'm fairly deep into this and don't completely understand what's going on with all of these advanced language features and the AST nodes representing them, so here's an attempt at a simpler explanation of my questions:

Relevant types extracted from Parsetree:

type module_type_desc =
  (* .. snip .. *)
  | Pmty_with of module_type * with_constraint list
        (* MT with ... *)
  | Pmty_typeof of module_expr
        (* module type of ME *)

type class_type_desc =
  (* .. snip .. *)
  | Pcty_signature of class_signature
        (* object ... end *)
  | Pcty_arrow of arg_label * core_type * class_type
        (* T -> CT       Simple
           ~l:T -> CT    Labelled l
           ?l:T -> CT    Optional l
         *)
  • I believe that Pmty_with and Pmty_typeof can only occur in ml files (and not mli files). Is this assumption correct?
  • Can Pcty_signature occur as a node that isn't the child of a Psig_class_type?
  • Can a Pcty_arrow occur in a valid mli file? Where? As a child of what?

As I mentioned before, I'm fairly confident of my handling of every besides these two (modules and classes). In case the above isn't clear, here's an annotated snippet of the code that transforms Parsetree.signature -> Parsetree.structure with all of the non-module/class stuff removed for brevity:

(* Parsetree.signature -> Parsetree.structure *)
let rec stub signature_items =
  (* Handles the module_type_desc *)
  let rec stub_module_type module_type =
    match module_type with
    | { pmty_desc = type_; pmty_attributes = attrs; _ } ->
      let expr =
        match type_ with
        | Pmty_ident ident -> Pmod_ident ident
        | Pmty_signature signatures -> Pmod_structure (stub signatures)
        | Pmty_functor (name, a, b) -> Pmod_functor (name, a, (stub_module_type b))
        (* XXX: unclear if these two can occur in an mli *)
        (* | Pmty_with (type_, constraints) -> _ TODO *)
        (* | Pmty_typeof type_ -> _ TODO *)
        | Pmty_extension ext -> Pmod_extension ext
        | Pmty_alias name -> Pmod_ident name
      in
        make_module_expr expr attrs
  in
  (* The next three functions handles the module_type for single and multiple (rec) modules *)
  let stub_module_decl module_decl =
    match module_decl with
    | { pmd_name = name; pmd_type = type_; pmd_attributes = attrs; _ } ->
      make_module_binding name (stub_module_type type_) attrs
  in
  let stub_module module_ = Pstr_module (stub_module_decl module_)
  and stub_modules modules = Pstr_recmodule (List.map stub_module_decl modules)
  and stub_include include_ =
    match include_ with
    | { pincl_mod = module_type; pincl_attributes = attrs; _ } ->
        Pstr_include (make_include_decl (stub_module_type module_type) attrs)
  in
  (* Handles classes (class_type) *)
  let stub_classes classes =
    (* Handles class_type_desc *)
    let stub_class_descr descr =
      let rec stub_class class_ =
        let stub_class_type type_ =
          match type_ with
          | Pcty_constr (ident, types) -> Pcl_constr (ident, types)
          | Pcty_signature class_ -> (* XXX: Is my below assumption true? *)
              failwith "should be covered by Psig_class_type -> Pstr_class_type"
          (* XXX: do we ever need to handle Pcty_arrow for mli files? *)
          (* | Pcty_arrow (label, a, b) -> _ *)
          | Pcty_extension ext -> Pcl_extension ext
          | Pcty_open (override, ident, class_) ->
              Pcl_open (override, ident, (stub_class class_))
        in
          match class_ with
          | { pcty_desc = type_; pcty_attributes = attrs; _ } ->
            make_class_expr (stub_class_type type_) attrs
      in
        match descr with
        | { pci_virt = virt; pci_params = params; pci_name = name;
            pci_expr = class_; pci_attributes = attrs } ->
          make_class_decl virt params name (stub_class class_) attrs
    in
      Pstr_class (List.map stub_class_descr classes)
  in
  let transform_signature signature_item =
    match signature_item with
    | { psig_desc = signature; _ } ->
      let desc =
        match signature with
        (* ... clip non-module/class stuff ... *)
        | Psig_module module_         -> stub_module module_
        | Psig_recmodule modules      -> stub_modules modules
        | Psig_include include_       -> stub_include include_
        | Psig_class classes          -> stub_classes classes
        | Psig_class_type classes     -> Pstr_class_type classes
      in
        make_str desc
  in
    List.map transform_signature signature_items

Unfortunately the module/class stuff is rather complex logic, so trimmed down there's still a lot. There are a ton of helps for creating the *_desc wrappers that encapsulate location in the file, attributes, etc., but those shouldn't be key to understanding how I'm handling modules and classes. But just for clarity, here are the types of all of the helpers:

val make_str : Parsetree.structure_item_desc -> Parsetree.structure_item

val make_module_expr :
  Parsetree.module_expr_desc -> Parsetree.attributes -> Parsetree.module_expr

val make_module_binding :
  string Asttypes.loc ->
  Parsetree.module_expr -> Parsetree.attributes -> Parsetree.module_binding

val make_include_decl :
  'a -> Parsetree.attributes -> 'a Parsetree.include_infos

val make_class_decl :
  Asttypes.virtual_flag ->
  (Parsetree.core_type * Asttypes.variance) list ->
  string Asttypes.loc ->
  'a -> Parsetree.attributes -> 'a Parsetree.class_infos

val make_class_expr :
  Parsetree.class_expr_desc -> Parsetree.attributes -> Parsetree.class_expr

Relevant docs:


Edit: As an aside, besides reading documentation on these features (which didn't yield any AST patterns I didn't already know about), I recalled that the compiled can derive the interface from the implementation ocamlc -i. I traced down the variable in the compiler (it's called print_types) that's linked to this flag and found all of its uses, but it was not immediately apparent to me where at any of its uses code is called that derives the mli file (perhaps it is done progressively with the parse, since compiling produces a cmi?). If someone with more OCaml chops or more experience with the compiler could point me to where the mli file is derived, it may be easier to reverse engineer these module and class AST nodes.

Edit 2: I am also aware of How to auto-generate stubs from mli file?, however the answer there is "do it manually," which definitely conflicts with what I'm attempting! (The answerer also claims that such a tool would be trivial, but after pouring over these AST nodes for a while, I beg to differ!)

Bailey Parker
  • 15,599
  • 5
  • 53
  • 91

2 Answers2

2

(I did not include the -dparsetree in my answer as it is heavy and not that interesting).

I believe that Pmty_with and Pmty_typeof can only occur in ml files (and not mli files). Is this assumption correct?

module M : module type of struct type t end with type t = int

As you can see from this valid mli file, this assumption isn't correct. .mli files require the same parser as .ml files do.

Can Pcty_signature occur as a node that isn't the child of a Psig_class_type?

class type c = object inherit object method x : int end end

Yes it can. Pcty_signature can occur anywhere a class type can occur. (note that there are two Pcty_signature here, one is the child of Pctf_inherit).

a Pcty_arrow occur in a valid mli file? Where? As a child of what?

class c' : int -> object method x : int end

Yes it can! And it can occur anywhere you'd indicate a class type.

Basically, you can consider that if a constructor can happen somewhere, then all the constructors of the same type can happen there too. Any type-related constructor can be in a .mli file (and non-type related constructors can happen too through the devious module type of).

If you have questions about where those are constructed, just take a look at parser.mly. Note that the same parser is used for the two file types.

PatJ
  • 5,996
  • 1
  • 31
  • 37
  • Thanks you! This is a fantastic answer! "If a constructor can happen somewhere, then all constructors of that type can happen there too" is pretty interesting. Although as an unfortunate side effect it means I have more work to do! I hope you don't mind if I use your nice contained examples as test cases. `mli` files can't have implementation though, correct? So, it seems like you couldn't have a `class_expr` anywhere in an `mli` (because that could contain a `Pcl_structure` with implementation). I think I'm still having trouble carving out the subset of cases I need to cover. – Bailey Parker Jan 17 '18 at 04:25
  • Also, given your bio blurb I'm wondering if you have any thoughts on how I've designed this thus far. I fear with the complexity needed to actually handle classes and modules it's going to get even more unwiedly (and some of the types of my `stub_*` might not work since it isn't always the case that types are synthesized to implementations). Sorry for being a bit of a leech as this is tangential to my question (and probably against some SO rules). But given your job description, it sounds like your someone who's brain I could really borrow right now! – Bailey Parker Jan 17 '18 at 04:27
  • Additionally, my plan for testing this was (in addition to hand written cases) to gather all the mli files in the ocaml repo, run my script on them, and then see if they successfully compile. I could also run `ocamlc -i` on all of the ml files and try the same on them. Presumably the compiler and stdlib should make use of most of the language features, so my hope is that this covers everything. Does this sound sane? (again sorry for being bothersome!) – Bailey Parker Jan 17 '18 at 06:34
  • @BaileyParker because of `module type of`, anything can happen in a mli. – PatJ Jan 17 '18 at 14:07
  • @BaileyParker This is quite flattering. I'm not sure about what you're trying to accomplish (and why), but you should probably focus on a subset of the language first. Like, forget about classes and objects for now and have something that works when they do not appear. Also, you might want to look at `ast_invariants.ml` for the invariants that actually happen in the AST. Maybe you'd want to use `ast_iterator.mli` and `ast_helper.mli` to write your code as well. – PatJ Jan 17 '18 at 14:16
  • Oh, I'll have to look more into `module type of` then. tl;dr of what I'm trying to do is make a library/script that takes an `mli` file and produces a `ml` stub implementation (with `failwith "not implemented"`) that type checks (the ultimate reason for this is I have a OUnit test suite that I want to run `-list-test` on to get the list of tests without having a real implementation for the test to run against). `Ast_helper` looks much cleaner than the ones I've cobbled together, so I'll definitely look into using that! – Bailey Parker Jan 17 '18 at 20:17
2

This is a great game. Give me a list of AST Nodes, I'll write you a file that uses them all. :D

module K : module type of String
module M : Map.S with type key = K.t

class fakeref : K.t -> object
    method get : K.t
    method set : K.t -> unit
  end

So, to summarize: Classes can take arguments, hence Pcty_arrow. Pcty_signature can also be the child of Psig_class, as shown above. The other two are standard module constructions that can absolutely appear in .mli files.

As for how ocamlc -i works ... well, it returns the signature inferred by the typechecker. There is no single point of access to this. You can read typing/HACKING.md if you want, but beware, the rabbit hole goes very deep. That being said, I do not think this will be all that helpful to achieve your goal.

My advice would be the following: all the nodes above are fairly easy to handle, except for with_type. This one is very hard, because it basically allows to compute in signatures. Just give up on that one for now.

Also, be aware that values, modules, module types, class and class types all have different namespaces. Pmty_ident x -> Pmod_ident x is not correct.

Drup
  • 3,679
  • 13
  • 14
  • Thanks so much; this is really great! "I'll write you a file that uses them all" - I hope its ok if I use this as one of my test cases! I'll take your advice and have another go at everything but `with_type` first (although I don't fully understand why it is complicated, so I probably need to do more reading!). I think I see the problem you point out with `Pmty_ident -> Pmod_ident`. But just to be clear: a module type identifier should never under any circumstance become a module identifier then? – Bailey Parker Jan 17 '18 at 03:59
  • I was hoping there would be some nice central location in the typechecker that would do `structure -> signature` (which assuming a bijection, would be easy to reverse). But given your advice, I'll stick to more understanding the syntax than blinding reversing what the typechecker does. – Bailey Parker Jan 17 '18 at 04:01
  • Also, again for clarity, is there one clear stub `ml` corresponding to the `mli` you gave? For the class it seems like `class fakeref _ = object method get = failwith "not implemented" method set _ = failwith "not implemented" end` would work. But what about for the modules? – Bailey Parker Jan 17 '18 at 04:17
  • Digging into this a bit more, I've confused myself again about `Pmty_ident -> Pmod_ident`. I believe the following transformation valid: `Psig_include (Pmty_ident) -> Pstr_include(Pmod_ident)`. This is the equivalent of `include X` in the `mli` mapping to `include X` in the `ml` right? Basically, what I'm getting at is this mapping is fine (within `stub_module_type`) as long as `stub_module_type` is only called in contexts where you need to synethsize a module (and not where you want the actual type in the `ml` like in `module M : Map.S`). Does this sound right? – Bailey Parker Jan 17 '18 at 06:28
  • Expanding upon my question immediately above, it seems like the distinction to be made is between `module_type` and `module_type_declaration` (the latter of which wraps the former). My understanding is that the aforementioned transformation is ok in the `module_type` context, but if it were wrapped in a `module_type_declaration` then it would no longer be valid. Is that what you were getting at? – Bailey Parker Jan 17 '18 at 06:37
  • 2
    In a signature `include X` refers to the module type `X`. In a structure, it refers to the *module* X. Modules and modules types are not the same. `include List` will not work in a signature and `include Map.S` will not work in a structure. – Drup Jan 17 '18 at 11:47
  • Oh no, I see. This is shaping up to be a difficult corner case for me. `include List` in an `ml` has a `mli` with all of the list methods included, so reversing that is easy (I already stub all of the methods/functions). But I think if I encounter a `include Map.S` in a `mli` the only way to synthesize a `ml` matching that that `mli` is to actually import the sig of the referenced type, stub it, and then inject that stub at the location of the include . Do you see an easier way? Also, are there any cases at all where `Pmty_ident -> Pmod_ident` is valid? Or is it always wrong? – Bailey Parker Jan 17 '18 at 17:33
  • I think I have a solution to the include issue. `ocamlc -i some_module.i` will output the same signatures but with the includes inlined. So presumably if I use `Typemod` as a preprocess step to my input AST there will be no `Psig_include` nodes. – Bailey Parker Jan 17 '18 at 20:10
  • The problem is that, if you start to use the typechecker, you are going to need to care about load path to be able to find external libraries and so, and that opens up a whole new can of worm. :) – Drup Jan 18 '18 at 11:06