I'm working on a simple web scraper where I scrape actor/actress names, but I'm getting an error. Could you please check it out, I think I'm doing something wrong.
It returns [ReferenceError: options2 is not defined]
error.
NON WORKING VERSION (using Promises)
var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');
// create promisified version of request()
function requestPromise(options) {
return new Promise(function (resolve, reject) {
request(options2, function (err, resp, body) {
if (err) return reject(err);
resolve(body);
});
});
}
app.get('/fetch', function (req, res) {
var promises = [];
var headers = {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
};
for (var i = 1; i < 10; i++) {
promises.push(requestPromise({url: "http://example.com/person/" + i + "/personname.html", headers: headers}));
}
Promise.all(promises).then(function (data) {
// iterate through all the data here
for (var i = 0; i < data.length; i++) {
if ($ = cheerio.load(data[i])) {
var links = $("#container");
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("null name returned, do nothing");
} else {
name = entities.decodeHTML(name);
console.log(name); // it doesn't echo the name, WHY?
}
} else {
console.log("can't open");
}
}
}, function (err) {
// error occurred here
console.log(err);
});
});
WORKING VERSION (Ugly and not in order, that's why I'm trying to move into Promises)
var entities = require("entities");
var request = require('request');
var cheerio = require('cheerio');
app.get('/back', function (req, res) {
for (var y = 1; y < 10; y++) {
(function () {
const id = y;
var url = "http://example.com/person/" + id + "/personname.html";
var options2 = {
url: url,
headers: {
'User-Agent': req.headers['user-agent'],
'Content-Type': 'application/json; charset=utf-8'
}
};
request(options2, function (err, resp, body) {
if (err) {
console.log(err);
} else {
if ($ = cheerio.load(body)) {
var links = $('#container');
var name = links.find('span[itemprop="name"]').html(); // name
if (name == null) {
console.log("null name returned, do nothing");
} else {
name = entities.decodeHTML(name);
console.log(name); // it echoes name, so it works.
}
}
else {
console.log("can't open");
}
}
});
}());
}
});