30

I've written a simple echo request/reply test for zeromq using node.js, Python, and Java. The code runs a loop of 100K requests. The platform is a 5yo MacBook Pro with 2 cores and 3G of RAM running Snow Leopard.

node.js is consistently an order of magnitude slower than the other two platforms.

Java: real 0m18.823s user 0m2.735s sys 0m6.042s

Python: real 0m18.600s user 0m2.656s sys 0m5.857s

node.js: real 3m19.034s user 2m43.460s sys 0m24.668s

Interestingly, with Python and Java the client and server processes both use about half of a CPU. The client for node.js uses just about a full CPU and the server uses about 30% of a CPU. The client process also has an enormous number of page faults leading me to believe this is a memory issue. Also, at 10K requests node is only 3 times slower; it definitely slows down more the longer it runs.

Here's the client code (note that the process.exit() line doesn't work, either, which is why I included an internal timer in addition to using the time command):

var zeromq = require("zeromq");

var counter = 0;
var startTime = new Date();

var maxnum = 10000;

var socket = zeromq.createSocket('req');

socket.connect("tcp://127.0.0.1:5502");
console.log("Connected to port 5502.");

function moo()
{
    process.nextTick(function(){
        socket.send('Hello');
        if (counter < maxnum)
        {
            moo();
        }
    });
}

moo();

socket.on('message',
          function(data)
          {
              if (counter % 1000 == 0)
              {
                  console.log(data.toString('utf8'), counter);
              }

              if (counter >= maxnum)
              {
                  var endTime = new Date();
                  console.log("Time: ", startTime, endTime);
                  console.log("ms  : ", endTime - startTime);
                  process.exit(0);
              }

              //console.log("Received: " + data);
              counter += 1;

          }
);

socket.on('error', function(error) {
  console.log("Error: "+error);
});

Server code:

var zeromq = require("zeromq");

var socket = zeromq.createSocket('rep');

socket.bind("tcp://127.0.0.1:5502",
            function(err)
            {
                if (err) throw err;
                console.log("Bound to port 5502.");

                socket.on('message', function(envelope, blank, data)
                          {
                              socket.send(envelope.toString('utf8') + " Blancmange!");
                          });

                socket.on('error', function(err) {
                    console.log("Error: "+err);
                });
            }
);

For comparison, the Python client and server code:

import zmq

context = zmq.Context()
socket = context.socket(zmq.REQ)
socket.connect("tcp://127.0.0.1:5502")

for counter in range(0, 100001):
    socket.send("Hello")
    message = socket.recv()

    if counter % 1000 == 0:
        print message, counter



import zmq

context = zmq.Context()
socket = context.socket(zmq.REP)

socket.bind("tcp://127.0.0.1:5502")
print "Bound to port 5502."

while True:
    message = socket.recv()
    socket.send(message + " Blancmange!")

And the Java client and server code:

package com.moo.test;

import org.zeromq.ZMQ;
import org.zeromq.ZMQ.Context;
import org.zeromq.ZMQ.Socket;

public class TestClient
{
    public static void main (String[] args)
    {
        Context context = ZMQ.context(1);

        Socket requester = context.socket(ZMQ.REQ);
        requester.connect("tcp://127.0.0.1:5502");

        System.out.println("Connected to port 5502.");

        for (int counter = 0; counter < 100001; counter++)
        {
            if (!requester.send("Hello".getBytes(), 0))
            {
                throw new RuntimeException("Error on send.");
            }

            byte[] reply = requester.recv(0);
            if (reply == null)
            {
                throw new RuntimeException("Error on receive.");
            }

            if (counter % 1000 == 0)
            {
                String replyValue = new String(reply);
                System.out.println((new String(reply)) + " " + counter);
            }
        }

        requester.close();
        context.term();
    }
}

package com.moo.test;

import org.zeromq.ZMQ;
import org.zeromq.ZMQ.Context;
import org.zeromq.ZMQ.Socket;

public class TestServer
{
    public static void main (String[] args) {
        Context context = ZMQ.context(1);

        Socket socket  = context.socket(ZMQ.REP);
        socket.bind("tcp://127.0.0.1:5502");

        System.out.println("Bound to port 5502.");

        while (!Thread.currentThread().isInterrupted())
        {
            byte[] request = socket.recv(0);
            if (request == null)
            {
                throw new RuntimeException("Error on receive.");
            }

            if (!socket.send(" Blancmange!".getBytes(), 0))
            {
                throw new RuntimeException("Error on send.");
            }
        }

        socket.close();
        context.term();
    }
}

I would like to like node, but with the vast difference in code size, simplicity, and performance, I'd have a hard time convincing myself at this point.

So, has anyone seen behavior like this before, or did I do something asinine in the code?

Scott A
  • 7,745
  • 3
  • 33
  • 46
  • 3
    can you try to simulate logic from your Python example (e.i send next message only after receiving previous)? – Andrey Sidorov Jul 11 '11 at 06:24
  • You have to remember Node is young still. You can't expect the simplicity and eloquence of a hardened veteran like python with a framework that's not even at its 1.0.0 release. – Swift Jul 11 '11 at 22:01
  • 1
    I wonder if there's something with the `zmq` module for Node.js? I did a [simple micro benchmark](https://gist.github.com/1543040#file_z100000000.out) and it showed Node.js significantly faster than all versions of Python for simple number crunching, comparable to the speed of Java. – nicerobot Jan 01 '12 at 02:27
  • I don't know if this is a fair test. As you say, your node process only used one core. Since node uses a single process, it can only use one core while threaded solutions are free to use all to cores you have on your machine. This page shows how to (with very few lines of code) spool up N node processes where N is the number of cores: http://nodejs.org/api/cluster.html – Jess May 06 '13 at 20:40
  • 1
    @Jess Neither of the other two examples used threading either. – Scott A May 06 '13 at 20:50

6 Answers6

17

You're using a third party C++ binding. As far as I understand it, the crossover between v8's "js-land" and bindings to v8 written in "c++ land", is very expensive. If you notice, some popular database bindings for node are implemented entirely in JS (although, partly I'm sure, because people don't want to compile things, but also because it has the potential to be very fast).

If I remember correctly, when Ryan Dahl was writing the Buffer objects for node, he noticed that they were actually a lot faster if he implemented them mostly in JS as opposed to C++. He ended up writing what he had to in C++, and did everything else in pure javascript.

So, I'm guessing part of the performance issue here has to do with that particular module being a c++ binding.

Judging node's performance based on a third party module is not a good medium for determining its speed or quality. You would do a lot better to benchmark node's native TCP interface.

chjj
  • 14,322
  • 3
  • 32
  • 24
  • 14
    I would agree with you except that I'm not interested in Node's native interface: I want to use zeromq. So a real world benchmark that uses the modules and interfaces I intend to use for the project is exactly the best kind of benchmark to perform. Anything else would be artificial and not terribly relevant. – Scott A Aug 27 '11 at 14:05
  • 1
    What kind of performance could we expect if somebody wrote a native JS ZeroMQ interface? Perhaps even faster than Python or Java? – dkamins Jan 14 '12 at 18:32
  • 1
    @dkamins: Quite possibly, however you run into a maintenance issue at that point since all (I think, haven't checked) of the zeromq interfaces use the C implementation. – Scott A Jan 19 '12 at 20:59
  • [They're working on it](https://github.com/JustinTulloss/zeromq.node/issues/496) and [they've already increased performance by an order of magnitude](https://github.com/JustinTulloss/zeromq.node/pull/499) since the question was asked. – Nepoxx Jun 07 '16 at 14:23
9

This was a problem with the zeroMQ bindings of node. I don't know since when, but it is fixed and you get the same results as with the other languages.

Dan Milon
  • 2,509
  • 2
  • 18
  • 17
  • That's good to know, and it's a reasonable assumption that the three examples should be relatively equivalent given that most of the code is running in the C bindings. – Scott A Aug 24 '12 at 14:32
9

"can you try to simulate logic from your Python example (e.i send next message only after receiving previous)?" – Andrey Sidorov Jul 11 at 6:24

I think that's part of it:

var zeromq = require("zeromq");

var counter = 0;
var startTime = new Date();

var maxnum = 100000;

var socket = zeromq.createSocket('req');

socket.connect("tcp://127.0.0.1:5502");
console.log("Connected to port 5502.");

socket.send('Hello');

socket.on('message',
          function(data)
          {
              if (counter % 1000 == 0)
              {
                  console.log(data.toString('utf8'), counter);
              }

              if (counter >= maxnum)
              {
                  var endTime = new Date();
                  console.log("Time: ", startTime, endTime);
                  console.log("ms  : ", endTime - startTime);
                  socket.close(); // or the process.exit(0) won't work.
                  process.exit(0);
              }

              //console.log("Received: " + data);
              counter += 1;

          socket.send('Hello');
          }
     );

socket.on('error', function(error) {
    console.log("Error: "+error);
});

This version doesn't exhibit the same increasing slowness as the previous, probably because it's not throwing as many requests as possible at the server and only counting responses like the previous version. It's about 1.5 times as slow as Python/Java as opposed to 5-10 times slower in the previous version.

Still not a stunning commendation of node for this purpose, but certainly a lot better than "abysmal".

Scott A
  • 7,745
  • 3
  • 33
  • 46
4

I'm not all that familiar with node.js, but the way you're executing it is recursively creating new functions over and over again, no wonder it's blowing up. to be on par with python or java, the code needs to be more along the lines of:

    if (counter < maxnum)
    {
       socket.send('Hello');
       processmessages();  // or something similar in node.js if available
    }
madprogrammer
  • 49
  • 1
  • 1
  • 3
    The modified solution fixes both that issue and the real performance killer: throwing the messages at the server without processing the responses first. It still performs worse than Python or Java, just not as ridiculously slow as the original flawed attempt. – Scott A Jan 11 '12 at 14:46
3

Any performance testing using REQ/REP sockets is going to be skewed due to round-tripping and thread latencies. You're basically waking up the whole stack, all the way down and up, for each message. It's not very useful as a metric because REQ/REP cases are never high performance (they can't be). There are two better performance tests:

  • Sending many messages of various sizes from 1 byte to 1K, see how many you can send in e.g. 10 seconds. This gives you basic throughput. This tells you how efficient the stack is.
  • Measure end-to-end latency but of a stream of messsages; i.e. insert time stamp in each message and see what the deviation is on the receiver. This tells you whether the stack has jitter, e.g. due to garbage collection.
Pieter Hintjens
  • 6,599
  • 1
  • 24
  • 29
  • Except that my use case is exactly that: issue a request, get a response. I'm not sure how testing streaming performance will show me what to expect in that type of scenario. I didn't set out to show which solution could stream messages the fastest. – Scott A Mar 01 '13 at 14:43
  • 1
    Read the Guide, it has lots of examples of high performance async request-reply. – Pieter Hintjens Mar 03 '13 at 09:48
  • I'm willing to be schooled, but if I'm comparing say Python vs. Java, it's probably a good idea to use the whole stack in the test, isn't it? – Scott A Mar 03 '13 at 15:48
1

Your client python code is blocking in the loop. In the node example, you receive the events in the 'message' event handler asynchronously. If all you want from your client is to receive data from zmq then your python code will be more efficient because it is coded as a specialized one-trick pony. If you want to add features like listen to other events that aren't using zmq, then you'll find it complicated to re-write the python code to do so. With node, all you need is to add another event handler. node will never be a performance beast for simple examples. However, as your project gets more complicated with more moving pieces, it's a lot easier to add features correctly to node than to do so with the vanilla python you've written. I'd much rather toss a little bit more money on hardware, increase readability and decrease my development time/cost.

matthewaveryusa
  • 632
  • 5
  • 24
  • 2
    Yes, it's toy Python code, but you're drawing broad conclusions that aren't supported by a narrow example. Node isn't magic sauce that makes things instantly readable: it's possible to write unmaintainable garbage using Node, just as it's possible to write excellently maintainable (sync or async) code using Python. I've written a **tremendous** lot of Javascript, but I find Python to be the more maintainable language in general. YMMV. – Scott A Aug 24 '12 at 14:30