I have a large CSV file (~500mb) that I want to convert to JSON using BabyParse (the node version of PapaParse). With smaller files I can read the CSV into a string and then pass the string to parse
. However, a 500mb file is to too big to be read into a string in this way.
I have a workaround that reads the CSV file as a stream line-by-line, but it's horrendously slow (see below). Can someone tell me a faster way to work with large CSV files in Papa/Baby parse?
var Baby = require('babyparse');
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var file = '500mbbigtest.csv';
//var content = fs.readFileSync(file, { encoding: 'binary' }); DOESN'T WORK
var instream = fs.createReadStream('500mbbigtest.csv');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
rl.on('line', function(line) {
parsed = Baby.parse(line, {fastMode: false});
rows = parsed.data;
rows = JSON.stringify(rows);
fs.appendFileSync("blahblahblah.json", rows);
});