0

So, I need to scrape real estate ads into a nidax.json file. I go to the all ads page, and use the link to the individual ads to take the data I need. I am using NodeJS Xray scraper, but for some reason it does not work.

Sometimes it returns nothing, sometimes it returns just links to individual ads.

var Xray = require('x-ray');
var x= Xray();
x('http://nidax-nekretnine.rs/nekretnine/','div.kutija-veca_dno > div.read-more` span ',[{
    url: 'a@href'
    items: x('div.kutija-veca_dno > div.read-more > span > a@href', {
    location: 'body > div.contentarea-novo > div > div.info-part > div.one-third  div.osnovni-podaci > p:nth-child(2) > span.orange-text',
}), // follow link to google images
}]).write('nidax.json');
Edi G.
  • 2,432
  • 7
  • 24
  • 33

1 Answers1

0

You can subscribe to get when the following pull request is being approved.

Meanwhile I recommend you to apply the solution in your downloaded x-ray module. It's one line code and I tested in two projects, it simply works. Take a look at the index.js file at line 237 see "return" after the long comment:

function WalkHTML (xray, selector, scope, filters) {
  return function walkHTML ($, fn) {
    walk(selector, function (v, k, next) {
      if (typeof v === 'string') {
        var value = resolve($, root(scope), v, filters)
        return next(null, value)
      } else if (typeof v === 'function') {
        return v($, function (err, obj) {
          if (err) return next(err)
          return next(null, obj)
        })
      } else if (isArray(v)) {
        if (typeof v[0] === 'string') {
          return next(null, resolve($, root(scope), v, filters))
        } else if (typeof v[0] === 'object') {
          var $scope = $.find ? $.find(scope) : $(scope)
          var pending = $scope.length
          var out = []

          // Handle the empty result set (thanks @jenbennings!)
          if (!pending) return next(null, out)

          $scope.each(function (i, el) {
            var $innerscope = $scope.eq(i)
            var node = xray(scope, v[0])
            node($innerscope, function (err, obj) {
              if (err) return next(err)
              out[i] = obj
              if (!--pending) {
                return next(null, compact(out))
              }
            })
          })
          // Nested crawling broken on 'master'. When to merge 'bugfix/nested-crawling' #111, Needed to exit this without calling next, the problem was that it returned to the "finished" callback before it had retrived all pending request. it should wait for "return next(null, compact(out))"
          return
        }
      }
      return next()
    }, function (err, obj) {
      if (err) return fn(err)
      fn(null, obj, $)
    })
  }
}
Rober
  • 726
  • 8
  • 27