0

Every time I test my code it produces a different hash even though it is the same file/object from S3. Here is my code:

"use strict";
var crypto = require('crypto');
let AWS    = require("aws-sdk/global");
AWS.config.update({region: "us-east-1"});
let S3 = require("aws-sdk/clients/s3");
let s3 = new S3();

const successResponse = { statusCode: 200, body: JSON.stringify('Processed File Metadata successfully')};

// Main Lambda entry point
exports.handler = (event, context, callback) => {
  context.callbackWaitsForEmptyEventLoop = false;

  var bucketName = event.Records[0].s3.bucket.name;
  var objectKey  = event.Records[0].s3.object.key;

  getS3ObjectHash(bucketName, objectKey, function(err, s3ObjectHash){
    if(err){
      console.log("Error getting S3ObjectHash"+err.message);
      return callback(err);
    }else{
      return callback(null, successResponse);
    }
  });
};

async function getS3ObjectHash(srcBucket, srcKey, callback){
  let hashResult;
  try {
    console.log("Bucket "+srcBucket);
    console.log("Key "+srcKey);
    const params = {
      Bucket: srcBucket,
      Key: srcKey
    };
      // Creating Hash to be used as the Key for S3 Object
    let hash = crypto.createHash('sha256');
    
    let stream = s3.getObject(params, function(err, data){
      if(err){
        console.log(err);
        return;
      }
    }).createReadStream();
    stream.on('data', function(data){
      hash.update(data, 'binary');
    });
    stream.on('end', function(){
      hashResult = hash.digest('hex');
      console.log("**** Result hash "+ hashResult);
    return callback(null, hashResult);
    });
  } catch (error) {
      console.log("Caught error "+ error.message);
      return callback(error);
  } 
}

Here is the test data that I'm using. It is the S3 trigger event when creating an object :

{
  "Records": [
    {
      "eventVersion": "2.1",
      "eventSource": "aws:s3",
      "awsRegion": "us-east-2",
      "eventTime": "2019-09-03T19:37:27.192Z",
      "eventName": "ObjectCreated:Put",
      "userIdentity": {
        "principalId": "AWS:PRINCIPAL-ID"
      },
      "requestParameters": {
        "sourceIPAddress": "205.255.255.255"
      },
      "responseElements": {
        "x-amz-request-id": "D82B88E5F771F645",
        "x-amz-id-2":  "vlR7PnpV2Ce81l0PRw6jlUpck7Jo5ZsQjryTjKlc5aLWGVHPZLj5NeC6qMa0emYBDXOo6QBU0Wo="
      },
      "s3": {
        "s3SchemaVersion": "1.0",
        "configurationId": "828aa6fc-f7b5-4305-8584-487c791949c1",
        "bucket": {
          "name": "BUCKET-NAME",
          "ownerIdentity": {
            "principalId": "PRINCIPAL-ID"
      },
          "arn": "arn:aws:s3:::BUCKET-NAME"
        },
        "object": {
          "key": "utp/1185481445975.nrcs143_023308.pdf",
          "size": 1305107,
          "eTag": "b21b84d653bb07b05b1e6b33684dc11b",
          "sequencer": "0C0F6F405D6ED209E1"
        }
      }
    }
  ]
}

Here is the output from two consecutive tests using the same above test data

2020-06-21 INFO Key utp/1185481445975.nrcs143_023308.pdf
2020-06-21 INFO **** Result hash ea41e18defef3f4e44743d847ed804aa818afb33765d7ea83d0d2d92138e5946

2020-06-21 INFO Key utp/1185481445975.nrcs143_023308.pdf
2020-06-21 INFO **** Result hash b226d051e637f7627acbab588ab171eebefad412c797a7e7cf66e92f924e578f

As you can see the Result Hash is different for each test.

I know that the test is the same.

I know that the S3 object is the same.

I know I'm not so good with Nodejs. Could someone please tell me what I'm doing wrong?

KingAndrew
  • 1,164
  • 4
  • 21
  • 41
  • 1
    I'm not a NodeExpert, but it could possibly be that you're hashing the response from S3 rather than the file contents itself. The response from S3 would include various metadata etc, and hence cause the hash to be different each time. Just a guess. – keithRozario Jun 21 '20 at 13:17
  • That does seem like a possibility. Although since I'm not changing the test event input shouldn't it still be the same hash? Would the metadata of the s3 object change because I executed getObject? – KingAndrew Jun 21 '20 at 14:43
  • @keithRozario It seems like you are correct. I made sure that I was only hashing the Body of the object and the hash remained constant! :-) If you make your comment an answer I will gladly award it to you. – KingAndrew Jun 21 '20 at 16:38

1 Answers1

2

I give credit to @keithRozario. His comment made me give it a try.

Once I only hashed the body the hash remained constant. Here is the code:

async function getS3ObjectHash(srcBucket, srcKey, callback){
  let hashResult;
  try {const params = {Bucket: srcBucket,Key: srcKey};
    // getting the object so we get the hash
    s3.getObject(params, function(err, data){
      if(err){console.log(err);
        return;
      }
      let hash = crypto.createHash("sha256");
      hashResult = hash.update(data.Body).digest("hex");
      console.log("**** Result hash "+ hashResult);
      return callback(null, hashResult);
    });
  } catch (error) {
      console.log("Caught error "+ error.message);
      return callback(error);
  } 
}
KingAndrew
  • 1,164
  • 4
  • 21
  • 41