When a csv file is uploaded on my s3 bucket, my lambda will be triggered to insert my data into DynamoDB. I need a stream because the file is too large to be downloaded as full object.
const batchWrite = async (clientDynamoDB, itemsToProcess) => {
const ri = {};
ri[TABLE_DYNAMO] = itemsToProcess.map((itm) => toPutRequest(itm));
const params = { RequestItems: ri };
await clientDynamoDB.batchWriteItem(params).promise();
};
function runStreamPromiseAsync(stream, clientDynamoDB) {
return new Promise((resolve, reject) => {
const sizeChunk = 25;
let itemsToProcess = [];
stream
.pipe(fastCsv.parse({headers: Object.keys(schemaGeData), trim: true}))
.on("data", (row) => {
stream.pause();
itemsToProcess.push(row);
if (itemsToProcess.length === sizeChunk) {
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
stream.resume();
});
itemsToProcess = [];
}
})
.on("error", (err) => {
console.log(err);
reject("Error");
})
.on("end", () => {
stream.pause();
console.log("end");
batchWrite(clientDynamoDB, itemsToProcess).finally(() => {
resolve("OK");
});
});
});
}
module.exports.main = async (event, context, callback) => {
context.callbackWaitsForEmptyEventLoop = false;
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const object = event.Records[0].s3;
const bucket = object.bucket.name;
const file = object.object.key;
const agent = new https.Agent({
keepAlive: true
});
const client = new AWS.DynamoDB({
httpOptions: {
agent
}
});
try {
//get Stream csv data
const stream = s3
.getObject({
Bucket: bucket,
Key: file
})
.createReadStream()
.on('error', (e) => {
console.log(e);
});
await runStreamPromiseAsync(stream, client);
} catch (e) {
console.log(e);
}
};
When my file is 1000 lines everything is inserted but when I have 5000 lines, my function insert only around 3000 lines and this number is random... Sometimes more sometimes less..
So I'd like to understand what am I missing here ?
I also read this article but to be honest even if you pause the second stream, the first one is still running.. So if someone have any ideas on how to do this, it would be greatly appreciated !
Thanks