I have the following settings defined for a casperjs
var casper = require('casper').create({
waitTimeout: 50000,
stepTimeout: 50000,
verbose: true,
viewportSize: {
width: 1400,
height: 768
},
pageSettings: {
"userAgent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',
"loadImages": false,
"loadPlugins": false,
"webSecurityEnabled": false,
"ignoreSslErrors": true
},
onStepTimeout: function() {
this.echo("Step timed out ");
var step = casper.getStepNumber();
casper.gotoStep(step+1);
}
});
I have added these functions in casperjs modules:
Casper.prototype.getStepNumber = function getStepNumber() {
"use strict";
return this.step;
};
Casper.prototype.gotoStep = function gotoStep(stepNum) {
"use strict";
var steps = this.steps,
last = steps.length;
this.checkStarted();
this.clear();
this.step = Math.min(stepNum,last);
return this;
};
And I have a list of urls in an array 'urlArray'. I am opening all of these urls one by one as follows:
casper.start().each(urlArray, function(self, url) {
casper.thenOpen(url, function() {
this.echo("INFO:"+"\t"+url+"\t"+"Opened."+"\n");
});
});
After opening url, I am looking for a particular string in the resources, once I reach there I am just printing that particular url to stdout and aborting the current request as follows
casper.on('resource.requested', function(resource,request) {
var url = resource.url;
if(url.indexOf("some string") !== -1) {
this.echo("url: "+url);
request.abort();
}
});
The problem: Casper is going to next page (from the urlArray) before it reaches the resource url that I am looking for and in some cases I am getting 'stepTimeout'. How can I restrict casper to wait til the resource url that I am looking for without getting stepTimeOut (lets say I will have 60 Sec as stepTimeOut) and without skipping the current url.
Current output is:
INFO: url1 Opened. INFO: url2 Opened. INFO: url3 Opened. prints the resource url that I am looking for. INFO: url4 pened. INFO: url5 Opened. INFO: url6 Opened. INFO: url7 Opened. INFO: url8 Opened. prints the resource url that I am looking for. INFO: url9 Opened. INFO: url10 Opened.
Note:
All the urls that I am crawling contains the resource url that I am searching for.