0

I am using capnproto to send messages between several nodes. Each node can both send and receive messages from all others. The relevant code looks like this:

main.ml:

let start_node id nodes =
    Lwt_main.run begin
        let listen_address = `TCP ("127.0.0.1", 9000 + id) in
        let config = Capnp_rpc_unix.Vat_config.create ~serve_tls:false ~secret_key listen_address in
        let service_id = Capnp_rpc_net.Restorer.Id.public "" in
        let restore = Capnp_rpc_net.Restorer.single service_id (Hs.local id nodes) in
        let* vat = Capnp_rpc_unix.serve config ~restore in
        let uri = Capnp_rpc_unix.Vat.sturdy_uri vat service_id in
        Fmt.pr "Server ID=%s running. Connect to URI %S.@." (Int.to_string id) (Uri.to_string uri);
        Lwt.wait () |> fst
    end

hs.ml:

(*client*)
let open_conn id =
    let uri = Uri.of_string ("capnp://insecure@127.0.0.1:" ^ Int.to_string (id + 9000)) in
    let client_vat = Capnp_rpc_unix.client_only_vat () in
    Capnp_rpc_unix.Vat.import_exn client_vat uri

let open_conns nodes =
    let ids = List.init nodes Fun.id in
    List.map (fun x -> open_conn x) ids

let send msg service =
    let open Api.Client.Hs.SendMsg in
    let request, params = Capability.Request.create Params.init_pointer in
    let _ = Params.msg_set_reader params msg in
    let* _ = Capability.call_for_value_exn service method_id request in
    Lwt.return ()

let send_msg service msg =
    Sturdy_ref.with_cap_exn service (send msg)

(*server*)
let local id nodes =
    (*...*)
    let conns = open_conns nodes in
    let module Hs = Api.Service.Hs in
    Hs.local @@ object
        inherit Hs.service

        method send_msg_impl params release_param_caps =
            let open Hs.SendMsg in
            let msg = Params.msg_get params in
            release_param_caps ();
                        (* do stuff like sending messages *)
            Service.return_empty ()
    end

After sending several messages the nodes crash with LWT errors:

hs: internal error, uncaught exception:
    Unix.Unix_error(Unix.EINVAL, "select", "")
    Raised by primitive operation at Lwt_engine.select#select in file "src/unix/lwt_engine.ml", line 405, characters 26-60
    Called from Lwt_engine.select_based#iter in file "src/unix/lwt_engine.ml", line 346, characters 8-39
    Called from Lwt_main.run.run_loop in file "src/unix/lwt_main.ml", line 41, characters 6-49
    Called from Lwt_main.run in file "src/unix/lwt_main.ml", line 118, characters 8-13
    Re-raised at Lwt_main.run in file "src/unix/lwt_main.ml", line 124, characters 4-13
    Called from Cmdliner_term.app.(fun) in file "cmdliner_term.ml", line 24, characters 19-24
    Called from Cmdliner_eval.run_parser in file "cmdliner_eval.ml", line 34, characters 37-44
Fatal error: exception Lwt_switch.Off
Raised by primitive operation at Lwt_unix.read_bigarray.(fun) in file "src/unix/lwt_unix.cppo.ml", line 688, characters 8-59
Called from Lwt_unix.wrap_syscall.(fun) in file "src/unix/lwt_unix.cppo.ml", line 571, characters 17-28
main.exe: [WARNING] Uncaught exception handling CapTP connection: Failure("recv: Unix.Unix_error(Unix.ECONNRESET, \"read\", \"\")") (dropping connection)
main.exe: [DEBUG] Error calling field(6110)(rc=1+1) -> #[] -> remote-promise(6109, rc=1) -> q0(Hs.sendMsg): Disconnected: Switch turned off
Fatal error: exception Failure("Hs.sendMsg: Disconnected: Switch turned off")
Raised at Stdlib.failwith in file "stdlib.ml", line 29, characters 17-33
Called from Lwt.Sequential_composition.bind.create_result_promise_and_callback_if_deferred.c in file "src/core/lwt.ml", line 1849, characters 23-26

How can I fix this?

Melkor
  • 779
  • 1
  • 12
  • 29

0 Answers0