3

I'm new to node.js and I'm trying to write a AWS lambda function that would stream the content of an s3 object into the node's crypto module to create a md5 checksum value of the s3 object. Not sure why but everytime I run the code it would generate different hash values on the console.log. can anyone point me in the right direction to fix my code? appreciate the help!

var crypto = require('crypto');
var fs = require('fs');
var AWS = require('aws-sdk'); 
var s3 = new AWS.S3(); 

exports.handler = (event, context, callback) => {

 var params = {
    Bucket: 'bucket_name',
    Key: 'key', 
 };

var hash = crypto.createHash('md5');
var stream = s3.getObject(params, function(err, data) {
  if (err){
    console.log(err);
    return;
  }
}).createReadStream();

stream.on('data', function (data) {
    hash.update(data, 'utf-8')
})

stream.on('end', function () {
    console.log(hash.digest('hex'))
})

};
Calvin
  • 407
  • 1
  • 5
  • 21

2 Answers2

8

You were close. You are mixing the "callback" style method signature with a "createReadStream" signature. Try this:

const crypto = require('crypto');
const fs = require('fs');
const AWS = require('aws-sdk');
const s3 = new AWS.S3();

exports.handler = (event, context, callback) => {
  let params = {
    Bucket: 'bucket_name',
    Key: 'key',
  };

  let hash = crypto.createHash('md5');
  let stream = s3.getObject(params).createReadStream();
  stream.on('data', (data) => {
    hash.update(data);
  });

  stream.on('end', () => {
    let digest = hash.digest('hex');
    console.log(digest);
    callback(null, digest);
  });
};
Todd Price
  • 2,650
  • 1
  • 18
  • 26
0

Not directly an answer, but you can also add the md5 has as a ETag when uploading a file to S3.

const crypt = require('crypto');
const fs = require('fs').promises;

const aws = require('aws-sdk');

async function uploadFileToS3WithMd5Hash(bucket, filename, s3Key = null) {
  const data = await fs.readFile(filename);
  const md5Base64 = crypt.createHash("md5").update(data).digest('base64');

  if (!s3Key) {
    s3Key = filename;
  }

  /** Should you want to get the MD5 in hex format: */
  // const md5Hex = Buffer.from(md5Base64, 'base64').toString('hex');

  return new Promise((res, rej) => {
    const s3 = new aws.S3();
    s3.putObject({
      Bucket: bucket,
      Key: s3Key,
      Body: data,
      ContentMD5: md5Base64,
    }, (err, resp) => err ? rej(err) : res(resp));
  })
}

uploadFileToS3WithMd5Hash('your-own-bucket', 'file.txt')
  .then(console.log)
  .catch(console.error);

So by checking the ETag for an object on S3, you would get the hex-string of the files MD5 hash.

In some cases (see this post by Dennis), MD5 checksum is computed automatically upon upload.

mraxus
  • 1,377
  • 1
  • 15
  • 23