I'm trying to get a list of all image src url's in a given webpage using PhantomJS. My understanding is that this should be extremely easy, but for whatever reason, I can't seem to make it work. Here is the code I currently have:
var page = require('webpage').create();
page.open('http://www.walmart.com');
page.onLoadFinished = function(){
var images = page.evaluate(function(){
return document.getElementsByTagName("img");
});
for(thing in a){
console.log(thing.src);
}
phantom.exit();
}
I've also tried this:
var a = page.evaluate(function(){
returnStuff = new Array;
for(stuff in document.images){
returnStuff.push(stuff);
}
return returnStuff;
});
And this:
var page = require('webpage').create();
page.open('http://www.walmart.com', function(status){
var images = page.evaluate(function() {
return document.images;
});
for(image in images){
console.log(image.src);
}
phantom.exit();
});
I've also tried iterating through the images in the evaluate function and getting the .src property that way.
None of them return anything meaningful. If I return the length of document.images, there are 54 images on the page, but trying to iterate through them provides nothing useful.
Also, I've looked at the following other questions and wasn't able to use the information they provided: How to scrape javascript injected image src and alt with phantom.js and How to download images from a site with phantomjs
Again, I just want the source url. I don't need the actual file itself. Thanks for any help.
UPDATE
I tried using
var a = page.evaluate(function(){
returnStuff = new Array;
for(stuff in document.images){
returnStuff.push(stuff.getAttribute('src'));
}
return returnStuff;
});
It threw an error saying that stuff.getAttribute('src') returns undefined. Any idea why that would be?