11

I'm using ExcelJS module and writing wrapper for my purposes, which implements Transform Stream API (yes, node version is 0.10.40).

ExcelJS has a stream API and according to example in ExcelJS module we can use one like this (execute node index.js > test.xlsx):

var stream = require('stream'),
    Excel = require('exceljs');

var rs = new stream.Readable({objectMode: true});
rs.push({name: 'one'});
rs.push({name: 'two'});
rs.push({name: 'three'});
rs.push(null);

var workbook = new Excel.stream.xlsx.WorkbookWriter({stream: process.stdout}),
    worksheet = workbook.addWorksheet('sheet 1');

worksheet.columns = [{header: 'Name', key: 'name'}];

rs.on('data', function(doc) {
    worksheet.addRow({
        name: doc.name
    }).commit();
});

rs.on('end', function(doc) {
    worksheet.commit();
    workbook.commit();
});

And it is working fine, but looks not very clean. Because we cannot use .pipe.

What I need:

'use strict';

var buffer = require('buffer'),
    stream = require('stream'),
    util = require('util'),
    Excel = require('exceljs');

var rs = new stream.Readable({objectMode: true});
rs.push({name: 'one'});
rs.push({name: 'two'});
rs.push({name: 'three'});
rs.push(null);

var ExcelTransform = function(options) {
    stream.Transform.call(this, options);
    this._writableState.objectMode = true;
    this._readableState.objectMode = false;

    this.workbook = new Excel.stream.xlsx.WorkbookWriter({stream: this});
    this.worksheet = this.workbook.addWorksheet('sheet 1');
    this.worksheet.columns = [{header: 'Name', key: 'name'}];
};

util.inherits(ExcelTransform, stream.Transform);

ExcelTransform.prototype._transform = function(chunk, encoding, callback) {
    if (buffer.Buffer.isBuffer(chunk)) {
        this.push(chunk);
    } else {
        this.worksheet.addRow({
            name: chunk.name
        }).commit();
    }
    callback();
};

ExcelTransform.prototype._flush = function(callback) {
    this.worksheet.commit();
    this.workbook.commit();
    callback();
};

rs.pipe(new ExcelTransform()).pipe(process.stdout);

But this is not working and giving me empty output.

cassln
  • 722
  • 2
  • 6
  • 18

3 Answers3

9

The output is empty because you're pushing nothing out of your transform stream. You're in object mode so it never goes in this if:

if (buffer.Buffer.isBuffer(chunk)) {
  this.push(chunk);
}

Here is a working pipeable version (the data is streamed at once at the end):

var stream = require('stream');
var util = require('util');
var Excel = require('exceljs');
var bl = require('bl');

var ExcelTransform = function(options) {
  stream.Transform.call(this, { objectMode: true });

  this.workbook = options.workbook;
  // you can make this optional by checking for it and
  // creating an empty worksheet if none provided
  this.worksheet = options.worksheet;
}

util.inherits(ExcelTransform, stream.Transform);

ExcelTransform.prototype._transform = function(doc, encoding, callback) {
  this.worksheet.addRow({ name: doc.name });    
  callback();
};

ExcelTransform.prototype._flush = function(callback) {
  this.workbook.commit(); // commit only when you're done

  var that = this;
  // bl drains the stream and create a Buffer object you can then push
  this.workbook.stream.pipe(bl(function(err, data) {
    that.push(data);
    callback();
  }));
};

// it's better to provide the workbook as a parameter to ExcelTransform
var workbook = new Excel.stream.xlsx.WorkbookWriter();
var worksheet = workbook.addWorksheet('sheet 1');
worksheet.columns = [{
  header: 'Name',
  key: 'name'
}];

var rs = new stream.Readable({ objectMode: true });
rs.push({ name: 'one' });
rs.push({ name: 'two' });
rs.push({ name: 'three' });
rs.push(null);

rs.pipe(new ExcelTransform({
  workbook: workbook,
  worksheet: worksheet
})).pipe(process.stdout);

Another solution, streaming all the time:

var stream = require('stream');
var util = require('util');
var Excel = require('exceljs');

var ExcelTransform = function(options) {
  stream.Transform.call(this, {
    writableObjectMode: true,
    readableObjectMode: false
  });

  this.workbook = options.workbook;
  var that = this;
  this.workbook.stream.on('readable', function() {
    var chunk = workbook.stream.read();
    that.push(chunk);
  });
  this.worksheet = options.worksheet;
}

util.inherits(ExcelTransform, stream.Transform);

ExcelTransform.prototype._transform = function(doc, encoding, callback) {
  this.worksheet.addRow({
    name: doc.name
  }).commit();

  callback();
};

ExcelTransform.prototype._flush = function(callback) {
  this.workbook.commit(); // final commit
};

// it's better to provide the workbook as a parameter to the ExcelTransform
var workbook = new Excel.stream.xlsx.WorkbookWriter();
var worksheet = workbook.addWorksheet('sheet 1');
worksheet.columns = [{
  header: 'Name',
  key: 'name'
}];

var rs = new stream.Readable({ objectMode: true });
rs.push({ name: 'one' });
rs.push({ name: 'two' });
rs.push({ name: 'three' });
rs.push(null);

rs.pipe(new ExcelTransform({
  workbook: workbook,
  worksheet: worksheet
})).pipe(process.stdout);
Shanoor
  • 13,344
  • 2
  • 29
  • 40
  • Thank you for answer. It is really working. Will be awesome if there is way to do this without one more dependency (`bl`). Have any idea? – cassln Feb 09 '16 at 07:12
  • @cassln I tried calling `read()` (empty buffer returned) and listening to 'data' event on `workbook.stream` (error), no luck. `workbook.stream` is a Duplex stream if you want to investigate more on this. – Shanoor Feb 09 '16 at 07:29
  • wait it's look like we are pushing whole output data to stream which means this is not true stream, according to it my problem is not solved. I need true stream when data pushed constantly. This is more effectively. Sorry, because of this I have to undo acceptance of this answer. If you have any other idea, you can write it. May be is better way to use Duplex instead of Transform. Generally it will be true stream and should working with `.pipe`. – cassln Feb 09 '16 at 07:51
  • 1
    @cassln You're right, it streams everything at the end. I added another solution. – Shanoor Feb 09 '16 at 08:48
  • look like stream now. But one question. You not call callback in `_flush`. If call it will throw error about pushing after EOF. If not call working fine, but looks strangely. Stream will not be frozen? – cassln Feb 09 '16 at 09:09
  • I tried to store `_flush` callback in ExcelTransform property and call it from `end` event of workbook stream. Working. This is more security, I think. – cassln Feb 09 '16 at 09:17
  • It's because there is a `that.push(chunk);` with `chunk` being `null` when everything is done (triggered by workbook.commit() I think). A null value signals the end of the stream so when you are in _flush, the stream is already closed, you can't callback anymore (callback also calls push). – Shanoor Feb 09 '16 at 09:17
  • 1
    Getting this error "Cannot read property 'endEmitted' of undefined" while listening to the readable event on ```this.workbook``` – Raghu Chahar Feb 12 '20 at 06:46
  • I have the same problem notified by @RaghuChahar – Paolo Laurenti Jul 29 '20 at 09:13
0

Below code snippet works for me

import { Workbook } from 'exceljs';
import { createReadStream } from 'fs';

function readExcelFile(){

const stream = createReadStream("./test.xlsx");
const workbook = new Workbook();
const streamWorkBook = await workbook.xlsx.read(stream);
const sheet = streamWorkBook.getWorksheet("Sheet1");

//Get all the rows data [1st and 2nd column]
 for (let i = 1; i <= sheet.rowCount; i++) {
  console.log(sheet.getRow(i).getCell(1).value);
  console.log(sheet.getRow(i).getCell(2).value);
 }

}
Pankaj Salunkhe
  • 235
  • 1
  • 5
  • 8
0

Since I was getting the same error as Raghu Chahar in his comment below https://stackoverflow.com/a/35285745/19357591 here some minor changes to allow Shanoor's answer to work.

The current release of exceljs creates a "StreamBuf" object which is utilising "readable-stream" version3.6 to allow streams to be compatible with e.g. nodejs 10. However, this StreamBuf has no "state", so reading "endEmitted" will lead to "Cannot read property 'endEmitted' of undefined". This means usage of this.workbook.stream.on('readable', function() {...}) is not possible.

What will work just fine is creating a writable stream, and do the push in its _write method.

The complete example based on Shanoor's solution (with creation of workbook and worksheet inside ExcelTransform):

var stream = require('stream');
var util = require('util');
var Excel = require('exceljs');

var ExcelTransform = function() {
  stream.Transform.call(this, {
    writableObjectMode: true,
    readableObjectMode: false
  });

  var writable = new stream.Writable({
    objectMode: false
  });
  var that = this;
  writable._write = function(chunk, encoding, next) {
    that.push(chunk);
    next();
  }

  this.workbook = new Excel.stream.xlsx.WorkbookWriter({ stream: writable });

  this.worksheet = this.workbook.addWorksheet('sheet 1');
  this.worksheet.columns = [{
    header: 'Name',
    key: 'name'
  }];
}

util.inherits(ExcelTransform, stream.Transform);

ExcelTransform.prototype._transform = function(doc, encoding, callback) {
  this.worksheet.addRow({
    name: doc.name
  }).commit();

  callback();
};

ExcelTransform.prototype._flush = function(callback) {
  this.workbook.commit(); // final commit
};


var rs = new stream.Readable({ objectMode: true });
rs.push({ name: 'one' });
rs.push({ name: 'two' });
rs.push({ name: 'three' });
rs.push(null);

rs.pipe(new ExcelTransform()).pipe(process.stdout);
hidden
  • 1
  • 2
  • When I use `fs.createWriteStream('example.xlsx')` as the write stream, I am getting a corrupt file that I cannot open in Excel. Any hints? – Manuel Reil Jan 19 '23 at 17:27