0

url = "www.w3schools.com/html/html5_video.asp"

VIDEO_LINKS = [];
VIDEO_LIST = [];

function fillVideo(callback) {
  request(url, function(err, res, body) {
    if (body) {
      $ = cheerio.load(body);
    }
    links = $('source');
    $(links).each(function(i, link) {
      var value = $(link).attr('src');
      if (value.slice(-3) == "mp4" ||
        value.slice(-4) == "webm" ||
        value.slice(-3) == "ogv") {
        VIDEO_LINKS.push(value);
        VIDEO_LIST.push($(link).text());

      }

    })
    callback();
  });
}

function writeVideo() {

  for (j = 0; j < VIDEO_LIST.length; j++) {
    request(VIDEO_LINKS[j]).pipe(fs.createWriteStream(VIDEO_LIST[j]));
  }
}

fillVideo(writeVideo);

//www.electronicinfo.ca/printable-pdfs

PDF_LINKS = [];
PDF_LIST = [];

function fillPDF(callback) {
  request(url, function(err, res, body) {
    $ = cheerio.load(body);
    links = $('a');
    $(links).each(function(i, link) {
      var value = $(link).attr('href');
      if (value.slice(-3) == "pdf") {
        PDF_LINKS.push(value);
        PDF_LIST.push($(link).text());
      }
    })
    callback();
  });
}

function writePDF() {
  for (j = 0; j < PDF_LIST.length; j++) {
    request(PDF_LINKS[j]).pipe(fs.createWriteStream(PDF_LIST[j]));
  }
}

fillPDF(writePDF);

Hi, This is code that used to work, I changed literally nothing, from about 5 minutes ago, the only thing I changed was duplicating it, and changing variable names. My question is how to fix this code? I know the error is that body is empty, but I dont know how to fix it, i would appeciate any help...

ibrahim mahrir
  • 31,174
  • 5
  • 48
  • 73
jewstin
  • 87
  • 1
  • 12

1 Answers1

0

The errors:

  1. Invalid url because of unspecified protocol. I've just tested your code, and request is throwing this error:

Error: Invalid URI "www.w3schools.com/html/html5_video.asp".

  1. The links extracted from the <source> elements are relative. This link will help you understand and solve the problem.
  2. The <source> element by itself doesn't have text. You'll have to use the one from the url using Url.parse(src).pathname.
  3. Error handling and some variables related errors: undeclared variables, uninitialized variables and gloabal variables that polute the global scope and could potentially cause other problems.

Fix:

You should specify the protocol (http, https, ...) for the url.

Also, your code needs a little bit of cleaning (declare variables, remove the global variables, ...):

var Url = require("url");
// ...

function fillVideo(url, callback) {
    request(url, function(err, res, body) {
        if(err) {
            return callback(err, null);                         // or throw an error if you like
        }

        var $ = cheerio.load(body),
            result = [];
        $('source').each(function() {
            var $this = $(this),
                src = $this.attr("src");
            if(/(?:mp4|webm|ogv)$/i.test(src)) {
                result.push({
                    url: Url.resolve(url, src),                 // make the url absolute
                    text: Url.parse(src).pathname
                });
            }
        });

        callback(null, result);
    });
}

Then use it like this:

fillVideo("http://www.w3schools.com/html/html5_video.asp", function(err, videos) {
    if(err) {
        console.log("Error: ", err);
        return;
    }

    videos.forEach(function(video) {
        request(video.url).pipe(fs.createWriteStream(video.text));
    });
});

Note:

Always check/log the err object of the callbacks. They tell exactly what is wrong with your code. In this case, that the URI is invalid, which is in fact true.

ibrahim mahrir
  • 31,174
  • 5
  • 48
  • 73
  • I appreciate your quick response, but what exactly was the problem with my code? – jewstin Apr 19 '18 at 00:00
  • Additionally, your code seems more convulated, then my code, no offense, i just seem to like my code better... – jewstin Apr 19 '18 at 00:02
  • If you ran my code, then this should work (initially i used this and it worked,) www.electronicinfo.ca/printable-pdfs (This wont work with the program, becuase wrong extension, but it should compile correctly) – jewstin Apr 19 '18 at 00:15
  • www.pubcom.com/PDF-test_1.html (This link worked the first time, but not the second time) – jewstin Apr 19 '18 at 00:19
  • @ ibrahim mahrir I realize that my URI, is invalid, but it was valid, I dont get why? Is there a reason, that I can use a url one time, but the next time I use it, it becomes invalid – jewstin Apr 19 '18 at 00:25
  • OK i think I see what Im doing wrong, thank you, so TLDR, my pdf function uses absoulete pdfs, which is why it works, and my Video function uses relative urls which is why it doesnt work, to resolve this I have to use Url.parse(src).pathname.to get the absoulete URL, which then I can use my original video function to get all the videos – jewstin Apr 19 '18 at 00:37
  • @jewstin Yeah exactly. `Url.resolve` to get the absolute path. And the text is missing too so you'll have to find another alternative (`Url.parse().pathname` will do). Anyways, I've found all the errors and listed them above (boy did they stop showing up). To make sure there is no other errors, I've run the above code and it run successfully and as expected. – ibrahim mahrir Apr 19 '18 at 00:41
  • Thanks man! I appreciate your time! I hope I can get this working! – jewstin Apr 19 '18 at 00:43