I am working on data management application where client can upload zip file(approx 250 MB)
with multiple text files(approx 1500 MB)
on AWS S3
.
But due to limited memory of aws lamda
(max 1536MB size) I am able to extract zip file of (50 MB) with extracted files of (500 MB).
Since I need to add some validation on extracted files while extracting and after that I have to store all contents of file in to database.
For now I am storing the content of files in aws-lambda tmp location
which also has limitation of max 500MB can use.
any streaming concept which can help to do my above task with validation will be helpful for me.
I can go with EC2, ECS
but right now I want to do with only AWS-Lambda
.
With this code I am extracting and uploading the zip files to another S3
bucket.
Any other concept like streaming will be helpful for me as I am not much familiar with streaming concept I am putting here to get some idea to resolve my issue.
s3.getObject(params, (err, data) => {
if (err) {
console.log('Error', err);
var message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
// callback(message);
} else {
console.log('Started to buffer data');
JSZip.loadAsync(data.Body).then(function(zip) {
fs.writeFile('temp/hello.txt', 'New file added for testing');
async.each(zip.files, function(item, cb1) {
if (!item.dir && item.name.includes('nightly')) {
zip.file(item.name).async("text").then(function(content) {
fs.writeFile('temp/' + item.name.replace(/^.*[\\\/]/, ''), content, function(err) {
if (err) throw err;
cb1();
});
});
} else {
cb1();
}
}, function(err, result) {
var zipObj = new JSZip();
fs.readdir('./temp', function(err, files) {
console.log(files);
async.each(files, function(file, cb2) {
fs.readFile('./temp/' + file, 'utf-8', function(err, content) {
if (err) {
return err;
}
zipObj.file(file, content);
cb2();
});
}, function(err) {
zipObj.generateAsync({
type: "nodebuffer"
})
.then(function(content) {
console.log(content);
deleteFiles(['./temp/*'], function(err, paths) {
console.log('Deleted files/folders:\n', paths.join('\n'));
});
s3.putObject({
Bucket: 'abtempb',
Key: 'temp/records.zip',
Body: content
}, function(err, result) {
if (result && result.ETag) {
console.log('uploaded file: ', result.ETag);
}
console.log('Error ', err);
});
});
});
});
});
});
}
});
Thank You