Our React Native app lets the user record a video (3 to 90 seconds) and then our app does a chunked upload to our CDN service (Cloudinary). As part of the chunked upload request each upload needs to include a header that indicates the byte range and the total size. For example: 'bytes: 0-5999999/6634945'
There isn’t an easy way to do chunked uploads in React Native but we found a library, rn-fetch-blob (https://github.com/joltup/rn-fetch-blob), that allows us to read the recorded file as a base64 stream in a buffer size that we specify (6,000,000 in our case) and then we use their fetch function to upload the chunks.
In order for us to use the above library, we are taking these steps:
- read the video file stream in 6,000,000 pieces (These 6,000,000 pieces are given to us as a base64 string. The length of the string is 8,000,000)
- In order for us to send base64 to our CDN, we have to prepend ‘data:video/mp4;base64,’ to the base64 string and then URI escape the string
- We send the data and some associate body info to the CDN as multipart/form-data
However we are seeing size discrepancies. What we are sending is not what they are receiving. For example, a 6,000,000 byte binary chunk (which goes through the above transforms) gets received on the CDN side as something larger.
Different 6 MB chunks yield different sizes on the server (The same chunk of data yields the same size difference on the server...this makes sense). For example, when we send a 6MB piece, the server receives it as 6,209,890 or 6,220,496, etc. The ratio of received:sent from what I’ve observed is 1.031 to 1.038.
I’ve even tested the chunked uploads with an Express server that I've set up locally. I’m using the multer library. I see the same size discrepancies and the same ratios.
Here’s some info from the React Native side that I’m logging:
** stat() of file: Object {lastModified: 1542825371000, size: 6634945, type: "file", path: "/storage/emulated/0/Movies/VID_20181121_103601.mp4", filename: "VID_20181121_103601.mp4"}
** readSTreamProm: Promise {_40: 0, _65: 1, _55: RNFetchBlobReadStream, _72: null}
** we got a stream: RNFetchBlobReadStream {tick: 10, encoding: "base64", bufferSize: 6000000, path: "file:///storage/emulated/0/Movies/VID_20181121_103601.mp4", closed: false…}
------------START--------------
** we got a chunk: number: 0
** last 2 bytes of base64 chunk L W
** binary bytes calculation: 0-5999999/6634945
** reading binary chunk: 6000000
** base64 chunk.length: 8000000
** uri_encoded chunk size: 8505440 ... total size: 8505440
------------END--------------
------------START--------------
** we got a chunk: number: 1
** last 2 bytes of base64 chunk = =
** binary bytes calculation: 6000000-6634944/6634945
** reading binary chunk: 634945
** base64 chunk.length: 846596
** uri_encoded chunk size: 901714 ... total size: 9407154
------------END--------------
Here’s the console.log of info that multer gives us from our multipart/form-data fetch request above (I've added 2 comments highlighting the size difference...look for '<=='):
req.body: { timestamp: '1542825377',
signature: 'hidden_signature',
eager: 'sp_full_hd_wifi/m3u8',
eager_async: 'true',
api_key: 'hidden_api_key' }
req.file: { fieldname: 'file',
originalname: 'upload.mp4',
encoding: '7bit',
mimetype: 'application/octet-stream',
destination: 'uploads/',
filename: '86cdf589e71f70fe9e09663066b1f635',
path: 'uploads/86cdf589e71f70fe9e09663066b1f635',
size: 655624 } <== the 634,945 is received as 655,624
req.headers: { 'content-range': 'bytes 8505440-9407153/9407154',
'x-unique-upload-id': 'ed4e59a2-cf41-4e62-b665-5297cb3aaa8b',
'content-type': 'multipart/form-data; boundary=RNFetchBlob-qz6pzbtgv0novbetjxs9o',
'content-length': '656482',
host: 'hidden_host',
'accept-encoding': 'gzip',
'user-agent': 'okhttp/3.6.0',
'x-forwarded-for': 'hidden_ip' }
req.body: { timestamp: '1542825377',
signature: 'hidden_signature',
eager: 'sp_full_hd_wifi/m3u8',
eager_async: 'true',
api_key: 'hidden_api_key' }
req.file: { fieldname: 'file',
originalname: 'upload.mp4',
encoding: '7bit',
mimetype: 'application/octet-stream',
destination: 'uploads/',
filename: 'b988d1b514994c92b52d01fe0941eae3',
path: 'uploads/b988d1b514994c92b52d01fe0941eae3',
size: 6189548 } . <== the 6,000,000 is received as 6,189,548
req.headers: { 'content-range': 'bytes 0-8505439/9407154',
'x-unique-upload-id': 'ed4e59a2-cf41-4e62-b665-5297cb3aaa8b',
'content-type': 'multipart/form-data; boundary=RNFetchBlob-u1xdhdoviih2udhzeocag',
'content-length': '6190406',
host: 'hidden_host',
'accept-encoding': 'gzip',
'user-agent': 'okhttp/3.6.0',
'x-forwarded-for': 'hidden_ip' }
What is causing the size discrepancy between the sending and the receiving end? Figuring this out will help me calculate the actual size to include in the header for our chunked uploads to the CDN.
Here's the React Native code...
import RNFetchBlob from 'rn-fetch-blob'
export function upload_to_cloudinary(uri) {
let base64_chunks = [] // we need to save it so that we can calculate new size and chunk sizes.
const sending_chunk_size = 6000000 //each chunk must be larger than 5 Mb, except for the last one
RNFetchBlob.fs.exists(uri)
.then((exist) => {
if (exist) {
RNFetchBlob.fs.stat(uri)
.then( stats => {
console.log('** stat() of file: ', stats)
const binary_total_size = stats.size
let total_size = 0
let chunk_num = 0
let start_byte_num = 0
let end_byte_num = 0
// Now read in streams as base64
const readStreamProm = RNFetchBlob.fs.readStream(uri, 'base64', sending_chunk_size)
readStreamProm.then((stream) => {
console.log('** we got a stream: ', stream)
stream.open()
stream.onData((chunk) => {
console.warn('------------START--------------')
console.log('** we got a chunk: number: ', chunk_num)
console.warn('** last 2 bytes of base64 chunk', chunk[chunk.length-2], chunk[chunk.length-1] )
start_byte_num = (sending_chunk_size * chunk_num)
end_byte_num = start_byte_num + sending_chunk_size // need to account for last chunk
end_byte_num = (end_byte_num > binary_total_size) ? binary_total_size : end_byte_num
end_byte_num -= 1
console.log(`** binary bytes calculation: ${start_byte_num}-${end_byte_num}/${binary_total_size}`)
console.log(`** reading binary chunk: ${end_byte_num - start_byte_num + 1}`)
console.log('** base64 chunk.length: ', chunk.length)
chunk_num += 1
const uri_encoded_chunk = encodeURIComponent('data:video/mp4;base64,' + chunk)
total_size += uri_encoded_chunk.length
base64_chunks.push(uri_encoded_chunk) //collect it
console.log(`** uri_encoded chunk size: ${uri_encoded_chunk.length} ... total size: ${total_size}`)
console.warn('------------END--------------')
})
stream.onEnd(() => {
console.log('** finished reading streamed data')
const shared_unique_id = uuid.v4()
let start_byte_num = 0
base64_chunks.forEach( (chunk, idx) => {
// we're sending the start, end, and total bytes based on the base64 uri encoded data but this is not affecting the discrepancy on the server side
upload_chunk(start_byte_num, start_byte_num+chunk.length-1, total_size, chunk, shared_unique_id, 'video')
start_byte_num = start_byte_num+chunk.length
})
})
// not related code intentionally left out
})
}
function upload_chunk( start_byte_num, end_byte_num, total_size, uri_encoded_base64_data, shared_unique_id, type) {
let timestamp = (Date.now() / 1000 | 0).toString()
let hash_string = 'our_hash'
let signature = CryptoJS.SHA1(hash_string).toString()
let upload_url = 'our_cloudinary_url`
return RNFetchBlob.fetch(
'POST',
upload_url,
{
'Content-Type': 'multipart/form-data',
'X-Unique-Upload-Id': shared_unique_id, // The unique id associates all the chunks from the same file with each other.
'Content-Range': `bytes ${start_byte_num}-${end_byte_num}/${total_size}`
},
[
{
name: 'file',
filename: 'upload.mp4',
data: uri_encoded_base64_data, //the mime type is in the data packet
},
{name: 'timestamp', data: timestamp },
{name: 'signature', data: signature},
{name: 'eager', data: 'sp_full_hd_wifi/m3u8'},
{name: 'eager_async', data: 'true'},
{name: 'api_key', data: our_api_key_hidden}
]
)
}