11

I would like to serialize data from a JSON object and send it throught the network with kafka as an end. Now I have an avro schema in a file, that determinate the fields necessary to send to kafka for the logging system:

{"namespace": "com.company.wr.messages",
   "type": "record",
   "name": "Log",
   "fields": [
       {"name": "timestamp", "type": "long"},
       {"name": "source", "type": "string"},
       {"name": "version", "type": "string"},
       {"name": "ipAddress", "type": "string"},
       {"name": "name", "type": "string"},
       {"name": "level", "type": "string"},
       {"name": "errorCode", "type": "string"},
       {"name": "message", "type": "string"}
       ]
}

I am using a node packages 'avro-schema', I tried others but none of then are working well, I just need to serialize in an avro way from node js.

Ray Booysen
  • 28,894
  • 13
  • 84
  • 111
Pistolo
  • 133
  • 1
  • 2
  • 10

2 Answers2

5

With avsc:

var avro = require('avsc');

// Parse the schema.
var logType = avro.parse({
  "namespace": "com.company.wr.messages",
  "type": "record",
  "name": "Log",
  "fields": [
    {"name": "timestamp", "type": "long"},
    {"name": "source", "type": "string"},
    {"name": "version", "type": "string"},
    {"name": "ipAddress", "type": "string"},
    {"name": "name", "type": "string"},
    {"name": "level", "type": "string"},
    {"name": "errorCode", "type": "string"},
    {"name": "message", "type": "string"}
  ]
});

// A sample log record.
var obj = {
  timestamp: 2313213,
  source: 'src',
  version: '1.0',
  ipAddress: '0.0.0.0',
  name: 'foo',
  level: 'INFO',
  errorCode: '',
  message: ''
};

// And its corresponding Avro encoding.
var buf = logType.toBuffer(obj);

You can find more information on the various encoding methods available here.

mtth
  • 4,671
  • 3
  • 30
  • 36
2

Here is an example of what we are doing for a similar use case where we send Avro records to another queue (Amazon Kinesis), adapted to your schema. We are using it with node-avro-io 0.2.0 and stream-to-arry 2.0.2.

var avro = require('node-avro-io');
var toArray = require('stream-to-array');
var schema = {
    "namespace": "com.company.wr.messages",
    "type": "record",
    "name": "Log",
    "fields": [
        {"name": "timestamp", "type": "long"},
        {"name": "source", "type": "string"},
        {"name": "version", "type": "string"},
        {"name": "ipAddress", "type": "string"},
        {"name": "name", "type": "string"},
        {"name": "level", "type": "string"},
        {"name": "errorCode", "type": "string"},
        {"name": "message", "type": "string"}
    ]
};
var writer = new avro.DataFile.Writer(schema, "snappy");
toArray(writer, function(err, arr) {
    var dataBuffer = Buffer.concat(arr);
    // Send dataBuffer to Kafka here
});
var record = {
    "timestamp": 123,
    "source": "example.com",
    "version": "HTTP 1.1",
    "ipAddress": "123.123.123.123",
    "name": "Jim",
    "level": "INFO",
    "errorCode": "200",
    "message": "foo"
};
writer.append(record).end();

The examples for node-avro-io, at the time of writing, are for serializing/deserializing Avro files on the filesystem. This example uses the stream-to-array package as a shortcut to getting a Buffer out of the stream-based node-avro-io package. The Buffer can be sent to your queue as the message in your Kafka producer.

Some other node.js packages, such as avronode and Collective's node-avro, are wrappers for the C++ library. I did not have as much success with these packages. Here's a tl:dr for node-avro's Avro C++ library installation instructions (building a .deb package for it). It may help with any C++ wrapper package.

sudo apt-get install -y libboost-all-dev cmake checkinstall
ssh clone git@github.com:apache/avro.git
cd avro
git checkout release-1.7.7
cd lang/c++
cmake -G "Unix Makefiles"
sudo checkinstall -y \
    --install=no \
    --pkgname="avro-cpp" \
    --pkgrelease="1.7.7" \
    --maintainer="me@example.com" \
    --addso=yes

For Collective's node-avro, I had to remove the export CXXFLAGS="-fcxx-exceptions" line from the bin/install-and-run-tests script on Ubuntu 14.04.

Mike Placentra
  • 835
  • 1
  • 14
  • 27