I am writing a small parser to process some log files using node streams (io.js, actually, but I dont think that matters).
I am following the example in the docs for unshift to parse out the header. I can successfully split the buffer and grab the header, but as soon as I call stream.unshift
it seems to concatenate the header string and the remaining string.
In setting up some example code for this question I found out that this behavior occurs when I am looking at a file based stream. Whenever I use a string based stream, the issue does not happen, even though file has the exact same text as the string.
Here is what the file looks like in my text editor with whitespace chars turned on, (for comparison):
I need some assistance understanding why this is happening.
var StringDecoder = require('string_decoder').StringDecoder;
// setup string based stream in fake_stream
var Stream = require('stream');
var fake_file = 'FILE_TYPE:SOME-HEADER-DATE\r\n'
+ 'HEADER_END\r\n'
+ '1234|logged data|1|2|3|4|5|some other logged data\x1E\r\n'
+ '1235|logged data|1|2|3|4|5|some other logged data\x1E\r\n'
+ '1236|logged data|1|2|3|4|5|some other logged data\x1E\r\n'
var fake_stream = new Stream.Readable();
fake_stream.push(new Buffer(fake_file, 'utf8'));
fake_stream.push(null);
// setup file based stream in file_stream
// the file minimal_test_log.glf has the text shown above (with the control characters unescaped)
var fs = require('fs');
var file = 'C:\\Some\\Path\\To\\minimal_test_log.glf';
var file_stream = fs.createReadStream(file);
// WHY AM I GETTING DIFFERENT RESULTS HERE?
parseHeader(file_stream, function(err, header, stream) {
console.log('processing file_stream: ' + header.length);
// RESULTS: processing file_stream: 184
// this results in the both parts concatenated without the HEADER_END/r/n
});
parseHeader(fake_stream, function(err, header, stream) {
console.log('processing fake_stream: ' + header.length);
// RESULTS: processing fake_stream: 28
// these results are what i would expect, everything before HEADER_END
});
// Slightly modified example found at https://iojs.org/api/stream.html#stream_readable_unshift_chunk
function parseHeader(stream, callback) {
stream.on('error', callback);
stream.on('readable', onReadable);
var decoder = new StringDecoder('utf8');
var header = '';
function onReadable() {
var chunk, buf, remaining;
var header_boundary = /HEADER_END\r\n/g;
while (null !== (chunk = stream.read())) {
var str = decoder.write(chunk);
if (str.match(header_boundary)) {
var split = str.split(header_boundary);
header += split.shift();
remaining = split.join('');
buf = new Buffer(remaining, 'utf8');
if (buf.length) {
stream.unshift(buf);
}
// the header length is different starting at this point
stream.removeListener('error', callback);
stream.removeListener('readable', onReadable);
callback(null, header, stream);
} else {
header += str;
}
}
}
}