I'm currently working on the capability to upload larger files to Azure Blob Storage using Express.js and the @azure/storage-blob
package.
What I have works fine on files <5mb but anything larger will "fail" at some point on the upload stream. I say "fail" because no error (that I can see) is thrown, it just hangs up mid-upload. Because there is no error I am struggling to figure out what could be causing the error.
index.js
const express = require('express');
const bodyParser = require('body-parser');
const volleyball = require('volleyball');
const cors = require('cors');
const busboy = require('connect-busboy')
require('dotenv').config()
const fileRoutes = require('./routes/files.routes')
const app = express();
// Enable Logging
app.use(volleyball);
// parse application/json
app.use(bodyParser.json());
// run cors middleware
app.use(cors());
//process body contents for file uploads
app.use(busboy({
highWaterMark: (1 * 1024 * 1024),
fileHwm: (1 * 1024 * 1024) / 2,
}))
app.use('/files', fileRoutes);
// Setting the port and publishing to that port
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log('Listening on port', port);
});
Code for the /files/t2
route:
const storageblob = require('@azure/storage-blob')
const {v4: uuidv4} = require('uuid')
const busboy = require('connect-busboy')
const rand = require('randomstring') // todo: remove from final
let sharedClient = null
const createClient = async () => {
const connectionString = process.env.STORAGE_CS
const containerName = '3ac53750-882a-11ec-8579-db09c3889ffa'
// Create the BlobServiceClient object which will be used to create a container client
const blobServiceClient = await storageblob.BlobServiceClient.fromConnectionString(connectionString);
const containerClient = blobServiceClient.getContainerClient(containerName);
sharedClient = containerClient
}
const saveFile = async (req, res) => {
if (sharedClient) {
console.log('already set')
} else {
console.log('attempting to set')
await createClient()
}
if (req.busboy) {
req.busboy.on('file', async (fieldname, file, filename) => {
console.log('Starting....')
const blobClient = sharedClient.getBlockBlobClient(filename.filename)
const bufferSize = (1 * 1024 * 1024) / 2
const response = await blobClient.uploadStream(
file,
bufferSize,
5,
{
onProgress: (ev) => {
console.log(ev)
// console.log({file})
},
blobHTTPHeaders: {blobContentType: filename.mimeType}
})
response._response.status
console.log({response})
try {
} catch (e) {
console.log({e})
}
file.on('data', () => {
console.log('we have an data call')
})
file.on('error', () => {
console.log('we have an error - inside')
})
file.on('limit', () => {
console.log('we have a limit reached - inside')
})
console.log('ending...')
})
req.busboy.on('error', () => {
console.log('we have an error')
})
req.busboy.on('finish', () => {
console.log('we finished')
})
req.busboy.on('close', () => {
console.log('we closed')
})
req.on('aborted', () => {
console.log('aborted')
})
console.log('starting pipe')
req.pipe(req.busboy)
res.send('finished...')
}
}
module.exports = {
saveFile,
}
Here is an excerpt of what the logs look like when attempting to upload an 11MB file:
MVPJ <—— 200 OK 6 B text/html; charset=utf-8 (<—> 2.8 ms)
{ loadedBytes: 524288 }
IeGN ——> POST /files/t2 11.49 MB multipart/form-data; boundary=--------------------------824775949779732965782851
already set
starting pipe
Starting....
IeGN <—— 200 OK 6 B text/html; charset=utf-8 (<—> 3.9 ms)
{ loadedBytes: 524288 }
{ loadedBytes: 1048576 }
E7Uh ——> POST /files/t2 11.49 MB multipart/form-data; boundary=--------------------------307975744074310648853410
already set
starting pipe
Starting....
E7Uh <—— 200 OK 6 B text/html; charset=utf-8 (<—> 3.1 ms)
{ loadedBytes: 524288 }
gIIV ——> POST /files/t2 11.49 MB multipart/form-data; boundary=--------------------------174965235814957167411128
already set
starting pipe
Starting....
gIIV <—— 200 OK 6 B text/html; charset=utf-8 (<—> 3.2 ms)
{ loadedBytes: 524288 }
{ loadedBytes: 1048576 }
OGOc ——> POST /files/t2 11.49 MB multipart/form-data; boundary=--------------------------880875904437719845837223
already set
starting pipe
Starting....
OGOc <—— 200 OK 6 B text/html; charset=utf-8 (<—> 3.1 ms)
{ loadedBytes: 524288 }
loMo ——> POST /files/t2 11.49 MB multipart/form-data; boundary=--------------------------121962168533176166251418
attempting to set
starting pipe
Starting....
loMo <—— 200 OK 6 B text/html; charset=utf-8 (<—> 31.2 ms)
{ loadedBytes: 524288 }
{ loadedBytes: 1048576 }
{ loadedBytes: 1572864 }
{ loadedBytes: 2097152 }
{ loadedBytes: 2621440 }
{ loadedBytes: 3145728 }
During this round of testing, it was failing pretty fast right around 500KB to 1MB. Other attempts have gotten as high as 6MB before failing. This leads me to believe it's not some hard limit but a throughput issue possibly? There doesn't seem to be much of a correlation between frequency and restarting the container/express server and where it fails.
Update 1
I've started to average higher upload amounts before hanging by increasing the maxConcurrency to 400 which seems to be ridiculously high from the examples I've seen which make between 5 and 20 seem like the norm. Not sure what this shows but it is interesting.
5s5m ——> POST /files/t2 25.6 MB multipart/form-data; boundary=--------------------------075957127342913853541742
attempting to set
starting pipe
Starting....
5s5m <—— 200 OK 6 B text/html; charset=utf-8 (<—> 36.0 ms)
{ loadedBytes: 524288 }
{ loadedBytes: 1048576 }
{ loadedBytes: 1572864 }
{ loadedBytes: 2097152 }
{ loadedBytes: 2621440 }
{ loadedBytes: 3145728 }
{ loadedBytes: 3670016 }
{ loadedBytes: 4194304 }
{ loadedBytes: 4718592 }
{ loadedBytes: 5242880 }
{ loadedBytes: 5767168 }
{ loadedBytes: 6291456 }
{ loadedBytes: 6815744 }
{ loadedBytes: 7340032 }
{ loadedBytes: 7864320 }
{ loadedBytes: 8388608 }
{ loadedBytes: 8912896 }
{ loadedBytes: 9437184 }
{ loadedBytes: 9961472 }
{ loadedBytes: 10485760 }