1

I tried to scrape the data from within the single request like in the code bellow but it doesn't work. When I tried just one procedure it worked. How to call multiple procedures within one request procedure?

var fs = require('fs');
var request = require('request');
var cheerio = require('cheerio');
var link = "www.google.com";
request(link, function (error, response, html) {
    if (!error && response.statusCode == 200) {
        var $ = cheerio.load(html);
        //scrape class
        $('.someclass').filter(function () {
            var data = $(this);
            var description = data.html();
            //write data to file
            fs.appendFile('description.txt', description + "\n", function (err) {
                if (err)
                    throw err;
            });
        });
        //scrape class1
        $('.someclass1').filter(function () {
            var data = $(this);
            var description1 = data.html();
            //write data to file
            fs.appendFile('description1.txt', description1 + "\n", function (err) {
                if (err)
                    throw err;
                //console.log('The "description" was appended to file!');
            });
        });
        //scrape class2
        $('.someclass2').filter(function () {
            var data = $(this);
            var description2 = data.html();
            //write data to file
            fs.appendFile('description2.txt', description2 + "\n", function (err) {
                if (err)
                    throw err;
                //console.log('The "description" was appended to file!');
            });
        });
    }
});
t3dodson
  • 3,949
  • 2
  • 29
  • 40
edinvnode
  • 3,497
  • 7
  • 30
  • 53

1 Answers1

2

Filter isn't doing what you think it is. You are looking for .each(). Filter takes a list and returns a smaller list. Each iterates over items.

function writeToFile($, methodStr, fileName, modifyFunc) {
    return function () {
        // Whoever calls this function gets its innerhtml written to whatever
        // fileName is passed to the outer function.
        var text = $(this)[methodStr]() + "\n";
        if (typeof modifyfunc === 'function') {
            text = modifyFunc(text);
        }
        fs.appendFileSync(fileName, text);
    };
}

Then apply it like this

request(link, function (error, response, html) {
    if (!error && response.statusCode == 200) {
        var $ = cheerio.load(html);
        // these each statements say that for every element that has .someclass
        // give it the inner function in writeToFile where fileName is description.txt
        $('.someclass').each(writeToFile($, 'text', 'description.txt'));
        $('.someclass1').each(writeToFile($, 'html', 'description1.txt'));
        $('.someclass2').each(writeToFile($, 'text', 'description.txt2', function (str){
            return str + "Here is a change that will also get written to the file";
        }));
    }
}
t3dodson
  • 3,949
  • 2
  • 29
  • 40
  • filterBody is undefined. – edinvnode May 27 '15 at 17:28
  • Doing this in node.js. – edinvnode May 27 '15 at 17:36
  • @macroscripts Gotcha I just looked up the cheerio library which seems pretty nice. It has .each() defined so I still think that this should be working for you. Did you change your code to match my update? Do you still have an undefined function? – t3dodson May 27 '15 at 18:06
  • @macroscripts from the docs I read about .each https://github.com/cheeriojs/cheerio#each-functionindex-element- Its also a standard library in jquery. Filter can do actions but you shouldn't use it for that. Its strictly for returning a smaller list. My code above shows how to call each like this. – t3dodson May 27 '15 at 18:09
  • @macroscripts I've added comments to explain what the code does I hope it helps. – t3dodson May 27 '15 at 18:12
  • Getting better. Now it says $ is not defined within writeToFile() function. – edinvnode May 27 '15 at 18:47
  • 1
    @macroscripts that was a slight error in my code. I've changed it now we are passing the $ which is the loaded html to the function so now it should work. – t3dodson May 27 '15 at 18:56
  • Works! One more thing. Can you add a part that will enable me to switch between text or html? I don't want each to do html(), some will do text(). – edinvnode May 27 '15 at 19:12
  • I have one more question. I will ask here. How can I use the extracted text outside of these functions. Let's say I want to do some more stuff on the text I wrote in the file. How can I do that bellow $('.someclass2').each(writeToFile($, 'text', 'description.txt2')); – edinvnode May 27 '15 at 19:47
  • 1
    @macroscripts I've tacked on another bit where you can optionally provide a function that receives the string as a parameter. and you must return the string that you want to be written to a file. – t3dodson May 27 '15 at 19:54
  • Ok that looks great but I was talking something like this. https://jsfiddle.net/2bzy16xs/ – edinvnode May 27 '15 at 19:58