3

I'm trying to read URLs from a CSV file, check whether they're available, and write out bad URLs to another file. I get through about a few thousand, before slowing down drastically (from 5 checks a second to a check every 10 seconds), and then get "FATAL ERROR: CALL_AND_RETRY_2 Allocation failed - process out of memory", presumably because I'm not applying backpressure correctly to the emitting stream.

var csv = require('csv');
var request = require('request');
var fs = require('fs');

csv()
.from(fs.createReadStream('./urls.csv'), {columns: true})
.to(fs.createWriteStream('./badurls.csv'))
.transform(function(data, index, callback){
  checkImage(null,callback,data['main-image-url'],index)
});

function checkImage(err, callback, url, index) {
  if (url != "") {
    request.head(url, function(err, res) {
      console.log(index,res.statusCode,url);
      if (res.statusCode != 200) {
        callback(null,url+"\n");
      }
    })
  }
}
Paul Sweatte
  • 24,148
  • 7
  • 127
  • 265
Dan Kohn
  • 33,811
  • 9
  • 84
  • 100

0 Answers0