I have written a function that gets a list of hyperlink anchors through webscraping.
I want to push all these anchors onto an object array, which will later be serialized to a Json string.
The Api.GetCourseSubmenuUrl
method and the Api.FilterSubmenuContentList
both return promises.
The following code however keeps running without waiting for the array to be filled in the .each()
cheerio function. Why does this happen?
Please note that the each method in cheerio is synchronous.
My code uses the packages:
- Bluebird (https://github.com/petkaantonov/bluebird)
- Cheerio (https://github.com/cheeriojs/cheerio)
- Request (https://github.com/request/request)
Code:
Connection.prototype.FillCourseWithSubmenuContent = function(course){
var self = this; //This class
var submenuItems = [];
return new BPromise(function(resolve, reject){
return Api.GetCourseSubmenuUrl(ApiConnection.authToken).then(function(response){
return request.get({url: self.url + response.url + course.id, followRedirect: false, jar: cookiejar}, function(err,httpResponse,body){
if(err){
reject(err);
}
var cheerio = require('cheerio');
var dashboardhtml = cheerio.load(body, {
normalizeWhitespace: true,
decodeEntities: true
}
);
//Find all the links on the page
dashboardhtml('a').each(function(i, elem) {
console.log("Object:");
console.log({"text":dashboardhtml(elem).text(), "url":dashboardhtml(elem).attr('href')});
submenuItems.push({"text":dashboardhtml(elem).text().trim(), "url":dashboardhtml(elem).attr('href')});
});
resolve();
});
}).then(function(){
console.log(submenuItems);
return Api.FilterSubmenuContentList(ApiConnection.authToken, submenuItems);
});
}).catch(function(error){
return reject(error);
});
};