While building a fairly complex scraper i stumbled upon a problem with a control flow of my code.
What's going on in code below: 1) request a URL 2) scrape NEWURL from the results 3) pass it to readability API as first async function 4) here comes the trouble — i never get the next async function which saves readabilityData to DB
How to solve this problem? I'm new to JS, so please feel free to point out at any issues with my code.
request(URL, function(error, response, html) {
if (!error) {
var $ = cheerio.load(html);
NEWURL = data.find('a').attr('href');
readabilityData = {}
var articleUrl = 'https://readability.com/api/content/v1/parser?url=' + NEWURL + token;
async.series([
function(){
request(articleUrl, function(error, response, html) {
if (!error) {
readabilityData = response.toJSON();
}
});
},
function(readabilityData){
Article.findOne({
"link": url // here's the
}, function(err, link){
if(link) {
console.log(link)
} else {
var newArticle = new Article({
// write stuff to DB
});
newArticle.save(function (err, data) {
// save it
});
}
});
}
],
function(err){
console.log('all good — data written')
});
});
}
});