3

I have created a Lambda function to create thumbnail images from mp4 video files using ffmpeg and Mediainfo which works great for smaller files.

So far, I have been successful in creating a thumbnail image for files sized 372.5 KB and 73.4 KB, but have received an error for files sized 2.9 MB and 7.9 MB.

In my CloudWatch logs I see the following error:

https://s3-us-west-2.amazonaws.com/object-path, HTTP server doesn't seem to support byte ranges. Cannot resume.

The error is happening when I am trying to extract the video metadata with Mediainfo - I installed the Mediainfo binary with libcurl in an EC2 environment.

I am a relative novice with cURL, Mediainfo, and Lambda so I feel I have reached my boundary in trying to figure this out. I am unsure if this particular error is arising due to the Lambda node environment or has something to do with Mediainfo.

Any help to resolve this would be greatly appreciated. I can provide more clarifying information if it is needed.

Code for reference --

process.env.PATH = process.env.PATH + ":/tmp/";
var child_process = require("child_process");
child_process.exec(
  "cp /var/task/ffmpeg /tmp/.; chmod 755 /tmp/ffmpeg;",
  function (error, stdout, stderr) {
    if (error) {
      console.log(error);
    }
  }
);

var mediainfo = require("mediainfo-wrapper");
var async = require("async");
var AWS = require("aws-sdk");
var fs = require("fs");
var utils = {
  decodeKey: function(key) {
    return decodeURIComponent(key).replace(/\+/g, " ");
  }
};
var s3 = new AWS.S3();
var thumbKeyPrefix = "thumbnails/",
  thumbWidth = 300,
  thumbHeight = 300,
  allowedFileTypes = ["mp4"];

exports.handler = function(event, context) {
  var tmpFile = fs.createWriteStream("/tmp/screenshot.jpg");
  var srcKey = utils.decodeKey(event.Records[0].s3.object.key),
    bucket = event.Records[0].s3.bucket.name,
    dstKey = thumbKeyPrefix + srcKey.replace(/\.\w+$/, ".jpg"),
    fileType = srcKey.match(/\.\w+$/),
    target = s3.getSignedUrl("getObject",{Bucket:bucket, Key:srcKey, Expires: 900}),
    metadata = {width: 0, height: 0, duration: 0};

  if(srcKey.indexOf(thumbKeyPrefix) === 0) return;
  if (fileType === null) {
    context.fail("Invalid filetype found for key: " + srcKey);
    return;
  }

  fileType = fileType[0].substr(1);

  if (allowedFileTypes.indexOf(fileType) === -1) {
    context.fail("Filetype " + fileType + " not valid for thumbnail, exiting");
    return;
  }

  async.waterfall([
    function createMetaData(next) {
      console.log('creating metadata...');
      mediainfo(target).then(function(data) {
        metadata.width = data[0].video[0].width[0] * 1;
        metadata.height = data[0].video[0].height[0] * 1;
        metadata.duration = data[0].video[0].duration[0] * 1;
        next(null);
      }).catch(function(err) {console.error(err)}); // ERROR LOGGED HERE
    },

    function createThumbnail(next) {
      console.log("creating thumbnail...");
      // use ffmpeg and metadata to create thumbnail
      // compute formattedTime, width, height ... cut for brevity

      var ffmpeg = child_process.spawn("ffmpeg", [
        "-ss", formattedTime, // time to take screenshot
        "-i", target, // url to stream from
        "-vf", "thumbnail,scale="+width+":"+height,
        "-q:v", "2",
        "-vframes", "1",
        "-f", "image2",
        "-c:v", "mjpeg",
        "pipe:1"
      ]);
      ffmpeg.on("error", function(err) {
        console.log(err);
      })
      ffmpeg.on("close", function(code) {
        if (code !== 0 ) {
          console.log("child process exited with code " + code);
        } else {
          console.log("Processing finished! Code: ", code);
        }
        tmpFile.end();
        next(code);
      });
      tmpFile.on("error", function(err) {
        console.log("stream err: ", err);
      });
      ffmpeg.on("end", function() {
        tmpFile.end();
      });
      ffmpeg.stdout.pipe(tmpFile)
        .on("error", function(err) {
          console.log("error while writing: ", err);
        });
    },

    function uploadThumbnail(next) {
      var tmpFile =  fs.createReadStream("/tmp/screenshot.jpg");
      child_process.exec("echo `ls -l -R /tmp`",
        function (error, stdout, stderr) {
          console.log("upload stdout: " + stdout)
      });
      var params = {
        Bucket: bucket,
        Key: dstKey,
        Body: tmpFile,
        ContentType: "image/jpg",
        ACL: "public-read",
        Metadata: {
          thumbnail: "TRUE"
        }
      };

      var uploadMe = s3.upload(params);
      uploadMe.send(
        function(err, data) {
          if (err != null) console.log("error: " +err);
            next(err);
          }
        );
      }
    ],
    function(err) {
      if (err) {
        console.error("Unable to generate thumbnail for '" + bucket + "/" + srcKey + "'" + " due to error: " + err);
        context.fail(err);
      } else {
        context.succeed("Created thumbnail for '" + bucket + "/" + srcKey + "'");
      }
    }
  );
};
pruhter
  • 517
  • 4
  • 6
  • S3 does support range requests... but with a pre-signed URL like `target` it's possible that a range request would only work with a pre-signed URL specifically coded to include the range header. Is the pre-signed URL using Signature V2 or V4? V2 has `AWSAccessKeyId=...` while V4 has `X-Amz-Credential=...`. – Michael - sqlbot Sep 21 '17 at 03:45
  • @Michael-sqlbot It appears that it is using V2. The pre-signed URLs that are being generated have the following query parameters: AWSAccessKeyId, Expires, Signature, x-amz-security-token – pruhter Sep 21 '17 at 17:53
  • I tried adding Range to my params for getSignedUrl but still am having the same issues - smaller files work, larger files don't. params now are... {Bucket: bucket, Key: srcKey, Expires: 900, Range: 'bytes=0-100000'} – pruhter Sep 21 '17 at 18:32
  • Well, that's interesting, because if anything should allow this to work, it's V2, which is usually very permissive with extra headers being added to the requests. You might enable logging in the bucket and see what requests are failing. – Michael - sqlbot Sep 21 '17 at 18:32
  • After enabling logging on the bucket, it looks like an error with code AccessDenied is being recorded. – pruhter Sep 21 '17 at 20:20
  • `GET` request? Or `HEAD`? Do you see any 206 responses? – Michael - sqlbot Sep 21 '17 at 20:52
  • I think the errors were actually being logged from a different bucket. I deleted the logs to replicate with a new test and received the same byte range error, but nothing was recorded in a verbose log file in the bucket. Something else to note, I navigated to the pre-signed URL that was generated and was able to freely access the video file. Testing with an older URL it correctly gave me a AccessDenied error, although this time due to the link expiring. – pruhter Sep 21 '17 at 21:18
  • 1
    This seemed to be a problem with Mediainfo. I went ahead and used ffprobe to extract metadata and was able to get around this issue. Thank you for your help @Michael-sqlbot – pruhter Sep 22 '17 at 01:21

1 Answers1

1

I was not able to fix this issue while using Mediainfo. I instead reverted to using ffprobe to extract the video metadata I needed, which I reduced down to simply using video duration...

function createMetaData(next) {
  console.log('capturing video duration...');
  var ffprobe = child_process.spawn('ffprobe', [
    '-v', 'quiet',
    '-print_format', 'json',
    '-show_format',
    target
  ]);
  ffprobe.stdout.on('data', function(data) {
    output.push(data);
  });
  ffprobe.on('error', function(err) {
    console.log('ffprobe error: ', err);
  });
  ffprobe.on('close', function() {
    var outputStr = output.join('');
    var jsonOut = {};
    jsonOut = JSON.parse(outputStr);
    videoDuration = jsonOut.format && jsonOut.format.duration ? jsonOut.format.duration : 0.5;
    next(null);
  })
}
pruhter
  • 517
  • 4
  • 6