0

I'm working on a simple web scraper where I scrape actor/actress names, but I'm getting an error. Could you please check it out, I think I'm doing something wrong.

It returns [ReferenceError: options2 is not defined] error.

NON WORKING VERSION (using Promises)

var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');

// create promisified version of request()
function requestPromise(options) {
  return new Promise(function (resolve, reject) {
    request(options2, function (err, resp, body) {
      if (err) return reject(err);
      resolve(body);
    });
  });
}

app.get('/fetch', function (req, res) {
  var promises = [];
  var headers = {
    'User-Agent': req.headers['user-agent'],
    'Content-Type': 'application/json; charset=utf-8'
  };
  for (var i = 1; i < 10; i++) {
    promises.push(requestPromise({url: "http://example.com/person/" + i + "/personname.html", headers: headers}));
  }
  Promise.all(promises).then(function (data) {
    // iterate through all the data here
    for (var i = 0; i < data.length; i++) {
      if ($ = cheerio.load(data[i])) {
        var links = $("#container");
        var name = links.find('span[itemprop="name"]').html(); // name
        if (name == null) {
          console.log("null name returned, do nothing");
        } else {
          name = entities.decodeHTML(name);
          console.log(name); // it doesn't echo the name, WHY?
        }
      } else {
        console.log("can't open");
      }
    }
  }, function (err) {
    // error occurred here
       console.log(err);
  });
});

WORKING VERSION (Ugly and not in order, that's why I'm trying to move into Promises)

var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');

app.get('/back', function (req, res) {
  for (var y = 1; y < 10; y++) {
    (function () {
          const id = y;
          var url = "http://example.com/person/" + id + "/personname.html";
          var options2 = {
          url: url,
        headers: {
          'User-Agent': req.headers['user-agent'],
          'Content-Type': 'application/json; charset=utf-8'
        }
      };
      request(options2, function (err, resp, body) {
        if (err) {
          console.log(err);
        } else {
          if ($ = cheerio.load(body)) {
                var links = $('#container');
            var name = links.find('span[itemprop="name"]').html(); // name
            if (name == null) {
              console.log("null name returned, do nothing");
            } else {
              name = entities.decodeHTML(name);
              console.log(name); // it echoes name, so it works.
            }
          }
          else {
            console.log("can't open");
          }
        }
      });
    }());
    }
});
salep
  • 1,332
  • 9
  • 44
  • 93
  • is it hitting the error? – dm03514 Nov 29 '15 at 18:52
  • This is basic Javascript debugging to follow what response you are getting and why it isn't following the code path you want. I'd say to first put a `console.log(err)` in the error branch of the `Promise.all().then()` handler. If any one of your requests is returning an error, `Promise.all()` will go to the error handler. If that doesn't show anything, then do a `console.log(data)` in the `Promise.all().then()` handler before your `for` loop to see exactly what you're getting there. Or, you can set a breakpoint in the two branches of the `Promise.all().then()` handler and just step through. – jfriend00 Nov 29 '15 at 18:54
  • I edited the question details, thanks for the help. – salep Nov 29 '15 at 19:00

1 Answers1

2

You have a mistake here:

// create promisified version of request()
function requestPromise(options) {
  return new Promise(function (resolve, reject) {
    request(options2, function (err, resp, body) {
      if (err) return reject(err);
      resolve(body);
    });
  });
}

You are referring to options2, but the parameter is named options.

Change it to this:

// create promisified version of request()
function requestPromise(options) {
  return new Promise(function (resolve, reject) {
    request(options, function (err, resp, body) {
      if (err) return reject(err);
      resolve(body);
    });
  });
}

One thing to learn from this about promises is that they catch errors for you so thus if you have an error handler and your code isn't working as desired, you should always log the error paths to see if one is getting hit. If you don't have an error handler defined in your code, you should set one and log it.

Logging errors in asynchronous operations is critically important to debugging because errors (even exceptions) in async code will not usually show up automatically in the console. So, you will often have to log them yourself to know they are occuring.

jfriend00
  • 683,504
  • 96
  • 985
  • 979