1

I'm writing a bulk downloader for node.js and trying to understand bluebird promises. I want to limit the number of parallel requests and disk writes. As I understand it, Promise.map() with {concurrent: } should do what I want.

Because pipe() and http.get() can't automatically be promisified, I'm trying to use custom promises.

But I don't fully understand the then() mechanism. To me, it sounds like the returned promise should only be fulfilled when the whole chain has been fulfilled.

However, in my code only the first promise in the chain appears to be waited for by map(), and many request and disk writes happen in parallel.

import Promise from 'bluebird';
import fs from 'fs';
import https from 'https';

Promise.promisifyAll(fs);
Promise.map(Images, image => {
        console.log("Opening image " + image.id);
        let file = fs.createWriteStream(dir + '/' + image.id + '.jpg');
        return new Promise((resolve, reject) => {
              console.log("Downloading image " + image.id);
              https.get(image.url, resolve).on("error", reject);
          })
          .then(response => {
              response.pipe(file);
              console.log("Saving image " + image.id);
              return new Promise((resolve, reject) => {
                  file.on("finish", resolve);
                  file.on("error", reject);
              });
          })
          .then(() => {
              console.log("Finished writing image " + image.id);
              file.close();
          })
          .catch(e => {
              console.log("Error during image save of " + image.id + ": " + e.code)
          });
    }, {concurrent: 50})
      .then(res => {
          console.log("Finished writing all images")
      })
      .catch(e => {
          console.log("Some images failed to be written: " + e.code)
      });
}

What am I doing wrong? Can you help me understand the flow of promise fulfillment and rejection?

  • Your code appears to be correct. Are you saying that there are *more* than 50 downloads+diskwrites happening concurrently? Can you post the resulting log (maybe with a downscaled example, 10 files with concurrency of 5), please? – Bergi Apr 16 '16 at 19:40
  • 1
    I figured it out. The keyword is `concurrency` not `concurrent`. Fail – unusual_thoughts Apr 17 '16 at 12:46

1 Answers1

3

From what I can understand, you're trying to download multiple images with promise. Actually you don't need to promisify fs. You should use request module for easier downloading.

Here's a shortest working example I can come up with

var Promise = require('bluebird');
var path = require('path');
var fs = require('fs');
var request = require('request');

var images = [{
    url: 'http://bluebirdjs.com/img/logo.png',
    file_name: 'bluebird.png'
}, {
    url: 'http://design.ubuntu.com/wp-content/uploads/ubuntu-logo32.png',
    file_name: 'ubuntu.png'
}, {
    url: 'https://www.raspberrypi.org/wp-content/uploads/2012/03/raspberry-pi-logo.png',
    file_name: 'raspberry-pi.png'
}];

// To Download Serially
Promise.each(images, image => new Promise((resolve, reject) => {
    console.log('Downloading Image: ' + image.file_name);
    request(image.url).on('error', reject).pipe(fs.createWriteStream(path.join(__dirname, image.file_name))).on('finish', () => {
        console.log('Downloaded Image: ' + image.file_name);
        resolve();
    });
})).then(() => {
    console.log('All Image Downloaded!');
}).catch(err => {
    console.error('Failed: ' + err.message);
});

// To Download in Parallel (with 2 maximum concurrent jobs)
Promise.map(images, image => new Promise((resolve, reject) => {
    console.log('Downloading Image: ' + image.file_name);
    request(image.url).on('error', reject).pipe(fs.createWriteStream(path.join(__dirname, image.file_name))).on('finish', () => {
        console.log('Downloaded Image: ' + image.file_name);
        resolve();
    });
}), {
    concurrency: 2
}).then(() => {
    console.log('All Image Downloaded!');
}).catch(err => {
    console.error('Failed: ' + err.message);
});