1

I'm experimenting with data streams where I would like to aggregate time series data. The reduce works, however I cannot find a solution to turn back the resulted array to another stream.

When I call a map on the reduce I only get back the whole array as the the result. Not the data within the array.

Any thoughts or hints are welcome.

const fs = require('fs')
const highland = require('highland')

const streamAgg = (aggData, parts) => {
        if (!aggData[parts.groupBySec]) {
          aggData[parts.groupBySec] = {}
          aggData[parts.groupBySec]['volume'] = parts.volume
          aggData[parts.groupBySec]['start-time'] = parts.timeStamp
          aggData[parts.groupBySec]['end-time'] = parts.timeStamp
        } else {
          aggData[parts.groupBySec]['volume']  += parts.volume
          aggData[parts.groupBySec]['end-time'] = parts.timeStamp
        }
        return aggData
      }


highland(fs.createReadStream('./timeseriesdata.csv', 'utf8'))
    .split()
    .map(line => line.split(','))
    .map(parts => ({
          timeStamp: parts[0],
          timeStampParsed: Date.parse(parts[0]),
          groupBySec: Math.floor(Date.parse(parts[0])/1000)*1000,
          volume: Number(parts[3]),
      }))
    .reject(parts => isNaN(parts.timeStampParsed))
    .reduce([], streamAgg)
    .map(x => x)
    .each(x => console.log(x))
richard_
  • 11
  • 1
  • 3

1 Answers1

0

Seems like you're reducing to an array instead of an object (what your reducer function seems to expect). This works:

const fs       = require('fs');
const highland = require('highland');


const streamAgg = (aggData, parts) => {
  if (!aggData[parts.groupBySec]) {
    aggData[parts.groupBySec]               = {};
    aggData[parts.groupBySec].volume        = parts.volume;
    aggData[parts.groupBySec]['start-time'] = parts.timeStamp;
    aggData[parts.groupBySec]['end-time']   = parts.timeStamp;
  } else {
    aggData[parts.groupBySec].volume     += parts.volume;
    aggData[parts.groupBySec]['end-time'] = parts.timeStamp;
  }
  return aggData;
};


highland(fs.createReadStream('./timeseriesdata.csv', 'utf8'))
  .split()
  .map(line => line.split(','))
  .map(parts => ({
    timeStamp:       parts[0],
    timeStampParsed: Date.parse(parts[0]),
    groupBySec:      Math.floor(Date.parse(parts[0]) / 1000) * 1000,
    volume:          Number(parts[3])
  }))
  .reject(parts => isNaN(parts.timeStampParsed))
  .reduce({}, streamAgg)
  .doto(console.log)
  .done(() => {
    process.exit(0);
  });
djanowski
  • 5,610
  • 1
  • 27
  • 17