0

This is the snippet that I use to tell CasperJS to parse a (single) web page for a targeted set of links.

var casper = require('casper').create();
var links;

function getLinks() {
    var links = document.querySelectorAll('li.Item div.CoverImageContainer a');
    return Array.prototype.map.call(links, function (e) {
        return e.getAttribute('href')
    });
}

casper.start('https://nypl.overdrive.com/search?mediaType=ebook&subject=111&subject=14&sortBy=newlyadded');

casper.then(function () {
    links = this.evaluate(getLinks);
});

casper.run(function () {
    this.echo(' - ' + links.join('\n - ')).exit();
});

And the output would be like:

$ casperjs OD-links.js
 - /media/3204074
 - /media/3691522
 - /media/3360010
 - ...

Now, I want CasperJS to do the same when supplied with multiple URLs (array), working on a single URL at a time. Here's how I am attempting to do that:

var casper = require('casper').create();
var links;
var urls;

function getLinks() {
    var links = document.querySelectorAll('li.Item div.CoverImageContainer a');
    return Array.prototype.map.call(links, function (e) {
        return e.getAttribute('href')
    });
}

casper.start('about:blank');
var urls = [
    'https://nypl.overdrive.com/search?sortBy=newlyadded&mediaType=ebook&subject=111&subject=14&page=1',
    'https://nypl.overdrive.com/search?sortBy=newlyadded&mediaType=ebook&subject=111&subject=14&page=2',
    'https://nypl.overdrive.com/search?sortBy=newlyadded&mediaType=ebook&subject=111&subject=14&page=3'
];

casper.each(urls, function(casper, url) {
    casper.thenOpen(url, function() {
        links = this.evaluate(getLinks);
    });

    casper.run(function () {
        this.echo(' - ' + links.join('\n - ')).exit();
    });
});

It almost works. CasperJS seems to parse each URL given to it in the array, BUT in the end, it's only outputting the 24 links parsed from the last page, instead of 72 (24 * 3 URLs).

How do I fix my code? What am I missing?

(PS: I am just beginning to learn JS, but need this for a personal project.)


Temporary workaround that I am currently using:

var casper = require('casper').create();
var links;

function getLinks() {
    var links = document.querySelectorAll('li.Item div.CoverImageContainer a');
    return Array.prototype.map.call(links, function (e) {
        return e.getAttribute('href')
    });
}

// THIS lets us use --url argument
casper.start(casper.cli.get('url'));

casper.then(function () {
    links = this.evaluate(getLinks);
});

casper.run(function () {
    this.echo(' - ' + links.join('\n - ')).exit();
});

So my command becomes:

casperjs OD-links.js --url='http://example.com/'
its_me
  • 10,998
  • 25
  • 82
  • 130
  • Why do you have your run command inside of each function? – Mario Nikolaus Jan 06 '18 at 01:31
  • @MarioNikolaus I don't understand, where am I doing that? If you are referring to `casper.start(casper.cli.get('url'));`, I am simply telling it to get the URL from the argument in the command I run. – its_me Jan 09 '18 at 02:32
  • Possible duplicate of [Casperjs iterating over a list of links using casper.each](https://stackoverflow.com/questions/40412726/casperjs-iterating-over-a-list-of-links-using-casper-each) – Vaviloff Jan 09 '18 at 09:22

0 Answers0