3

I'm working on a nightmare JS script that logs in to a website, builds a list of links based on results in a table then opens each of those links. On each link opened, some information is scraped and added to the end results.

I'm having lots of trouble with the loop of opening each link. This process must be synchronous due to restrictions at the website.

Below is a very simplified example of what I have so far, and what I'm getting returned.

I'm new at this library, and while I've been looking at the documentation I'm finding it pretty confusing.

Any advice on getting a .goto() loop working?

const Nightmare = require('nightmare')
const moment = require('moment')

const opts = {
  show: true,
  openDevTools: { mode: 'detach' },
  pollInterval: 250,
  waitTimeout: 10000,
  webPreferences: {
    webSecurity: false
  }
}

const nightmare = Nightmare(opts)

nightmare
  .goto('https://www.google.co.nz/')
  .evaluate(links => {
    var allHrefs = document.querySelectorAll('#fbar #fsl a')
    var allLinks = []
    allHrefs.forEach(function(a) {
      allLinks.push(a.href)
    })

    console.log('allLinks:', allLinks)

    return allLinks
  }, '.what')
  .end()
  .then(result => {
    console.log('result:', result)
    let titles = []
    result.forEach(link => {
      return nightmare
        .goto(link)
        .wait('#navheader')
        .evaluate(getTitle => {
          var thisTitle = document.title
          console.log('this title:', thisTitle)
          titles.push(thisTitle)
        })
    })
    console.log('titles:', titles)
    return titles
  })

and here, the console results after running the script:

$ node scripts/nm_test.js 
result: [ 'https://www.google.co.nz/intl/en/ads/?fg=1',
  'https://www.google.co.nz/services/?fg=1',
  'https://www.google.co.nz/intl/en/about.html?fg=1' ]
titles: []
Cat Burston
  • 2,833
  • 2
  • 12
  • 10
  • 1
    **must be synchronous due to restrictions at the website** Why would a website care whether the request to it was synchronous or asynchronous? How can it tell the difference? – Barmar Aug 29 '17 at 00:00
  • Are you sure you're not confusing rate limiting with synchronicity? – Barmar Aug 29 '17 at 00:01
  • good question - this website does rate limit logins: if you login using the same details twice, it knocks out the first login. But it also seems to hate it if you even have multiple links open from the website at the same time: I have no idea how/why they might have done that! – Cat Burston Aug 29 '17 at 00:16
  • Use promises so that you don't connect to the website multiple times concurrently. You can still do it asynchronously. – Barmar Aug 29 '17 at 00:19

0 Answers0