0

I'm using Request, Cheerio and Node.js

I need to limit concurrent requests to 10. I want to process URL's in parallel but not in the same time, hope it's clear.

Here's my current script. If I run my for loop from 0 to 100, it works. 100 to 200, it works. 200 to 300, it works. But 0-1000 gives connection error. I need to put a limit so it can work with 5 or 10 requests at a time.

app.get('/back', function (req, res) {
    for (var y = 0; y < 1000; y++) {
        (function () {
            const id = y;
            var url = "http://www.example.com/" + y;
            var options2 = {
                url: url,
                headers: {
                    'User-Agent': req.headers['user-agent'],
                    'Content-Type': 'application/json; charset=utf-8'
                }
            };

                request(options2, function (err, resp, body) {
                    if (err) {
                        console.log(err);
                    } else {
                        if ($ = cheerio.load(body)) {
                            var links = $('#container');
                            var name = links.find('span[itemprop="name"]').html(); // name
                            if (name == null) {
                                console.log("null returned");
                            } else {
                                name = entities.decodeHTML(name);
                                // check name existence
                                console.log("yay!" + name);
                                    }
                                });
                            }
                        }
                        else {
                            console.log("couldn't open it");
                        }
                    }
                }
            );
        }());
    }
});
salep
  • 1,332
  • 9
  • 44
  • 93
  • Are you attempting to screen scrape thousands of pages at once? You might need to implement a timeout or something in between these perhaps... – Mark Pieszak - Trilon.io Nov 04 '15 at 15:12
  • 1
    You can use something like the async module, but if you are getting into the thousands of pages, it might be time to look into kue or another message queue approach. – Matthew Bakaitis Nov 04 '15 at 15:22
  • @MarkPieszak, yes, at once. That's why I need to create some kind of queue. Matthew Bakaitis, thank you. I'll look into it. – salep Nov 04 '15 at 17:22

0 Answers0