I am trying to use Casperjs to get a list of links from a page, then open each of those links, and add to an array object a particular type of data from those pages.
The problem I am having is with the loop that executes over each of the list items.
First I get a listOfLinks
from the original page. This part works and using length I can check that this list is populated.
However, using the loop statement this.each
as below, none of the console statements ever show up and casperjs appears to skip over this block.
Replacing this.each
with a standard for loop, the execution only gets part way through the first link, as the statement "Creating new array in object for x.html" appears once and then the code stops executing. Using an IIFE doesn't change this.
Edit: in verbose debugging mode the following happens:
Creating new array object for https://example.com
[debug] [phantom] Navigation requested: url=about:blank, type=Other, willNavigate=true, isMainFrame=true
So for some reason the URL that is passed into the thenOpen function gets changed to blank...
I feel like there is something about Casperjs's asynchronous nature that I am not grasping here, and would be grateful to be pointed towards a working example.
casper.then(function () {
var date = Date.now();
console.log(date);
var object = {};
object[date] = {}; // new object for date
var listOfLinks = this.evaluate(function(){
console.log("getting links");
return document.getElementsByClassName('importantLink');
});
console.log(listOfLinks.length);
this.each(listOfLinks, function(self, link) {
var eachPageHref = link.href;
console.log("Creating new array in object for " + eachPageHref);
object[date][eachPageHref] = []; // array for page to store names
self.thenOpen(eachPageHref, function () {
var listOfItems = this.evaluate(function() {
var items = [];
// Perform DOM manipulation to get items
return items;
});
});
object[date][eachPageHref] = items;
});
console.log(JSON.stringify(object));
});