3

I am trying to upload a large file (say ~1GB) from client (using Python request.post) to the flask server.

When client sends the request to server in chunks of 1024, server do not read the whole file and save to server 0kb.

Can you please help me in debugging what exactly I am mistaking here.

Server - Flask Code:

from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import os

app = Flask(__name__)

app.config['UPLOAD_FOLDER'] = 'uploads/'

@app.route("/upload/<filename>", methods=["POST", "PUT"])
def upload_process(filename):
    filename = secure_filename(filename)
    fileFullPath = os.path.join(app.config['UPLOAD_FOLDER'], filename)

    with open(fileFullPath, "wb") as f:
        chunk_size = 1024
        chunk = request.stream.read(chunk_size)
        f.write(chunk)
    return jsonify({'filename': filename})


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=int("8080"),debug=True)

Client - Request Code

import os
import requests


def read_in_chunks(file_object, chunk_size=1024):
    while True:
        data = file_object.read(chunk_size)
        if not data:
            break
        yield data


def main(fname, url):
    content_path = os.path.abspath(fname)
    with open(content_path, 'r') as f:
        try:
            r = requests.post(url, data=read_in_chunks(f))
            print "r: {0}".format(r)
        except Exception, e:
            print e


if __name__ == '__main__':
    filename = 'bigfile.zip'  # ~1GB
    url = 'http://localhost:8080/upload/{0}'.format(filename)
    main(filename, url)
Amit Baswa
  • 101
  • 2
  • 6

4 Answers4

1

kindly use 'file.stream.read(chunk_size)' instead of request.stream.read(chunk_size). It works for me...!

1

Old thread but I was looking for something similar so I'll post here anyway.

The server reads the file in write mode which will overwrite at each chunk. Prefer append mode:

with open(fileFullPath, "ab") as f:

The client needs to read the file in byte mode:

with open(content_path, "rb") as f:

Finally, the generator read_in_chunks needs to be used in a loop before being passed to the request:

def main(fname, url):
    content_path = os.path.abspath(fname)
    with open(content_path, "rb") as f:
        try:
            for data in read_in_chunks(f):
                r = requests.post(url, data=data)
                print("r: {0}".format(r))
        except Exception as e:
            print(e)

Then you have your 2 files

Server

from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import os

app = Flask(__name__)

app.config["UPLOAD_FOLDER"] = "uploads/"


@app.route("/upload/<filename>", methods=["POST", "PUT"])
def upload_process(filename):
    filename = secure_filename(filename)
    fileFullPath = os.path.join(app.config["UPLOAD_FOLDER"], filename)

    with open(fileFullPath, "ab") as f:
        chunk_size = 1024
        chunk = request.stream.read(chunk_size)
        f.write(chunk)
    return jsonify({"filename": filename})


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=int("8080"), debug=True)

Client

import os
import requests


def read_in_chunks(file_object, chunk_size=1024):
    while True:
        data = file_object.read(chunk_size)
        if not data:
            break
        yield data


def main(fname, url):
    content_path = os.path.abspath(fname)
    with open(content_path, "rb") as f:
        try:
            for data in read_in_chunks(f):
                r = requests.post(url, data=data)
                print("r: {0}".format(r))
        except Exception as e:
            print(e)


if __name__ == "__main__":
    filename = "bigfile.zip"  # ~1GB
    url = "http://localhost:8080/upload/{0}".format(filename)
    main(filename, url)

Note that posting un chunks usually requires the total number of chunks and a hash of the file to validate the upload.

Beg-inner
  • 29
  • 3
0

Flask depends on werkzeug to process streams, and werkzeug demands a content length for a stream. There's a thread on this here, but no real solution currently available, other than to take another framework approach.

  • 1
    This is almost an only link-based answer. If at some point in future these links don't work this answer will not be very useful. That is why you're encouraged to include the important information from the links you're referring to in the answer itself. – mrun Jan 22 '18 at 21:30
0

This example below should work very well for you all. If you use Redis, you can also pub/sub the chunk being processed for progression bar in another API.

from flask import Flask, request, jsonify
@app.route("/submit_vdo", methods=['POST'])
def submit_vdo():

@copy_current_request_context
def receive_chunk(stream, full_file_path):
    if full_file_path is None:
        tmpfile = tempfile.NamedTemporaryFile('wb+', prefix=str(uuid.uuid4())+"_")
        full_file_path = tmpfile.name

    print ('Write temp to ', full_file_path)
    with open(full_file_path, "wb") as f:
        max_chunk_size = settings.VIDEO_MAX_SIZE_CHUNK # config.MAX_UPLOAD_BYTE_LENGHT
        count_chunks = 0
        total_uploaded = 0
        try:
            while True:
                print ('Chunk ', count_chunks)
                chunk = stream.read(max_chunk_size)
                if chunk is not None and len(chunk)>0:
                    total_uploaded += len(chunk)
                    count_chunks += 1
                    f.write(chunk)   
                    temp = {}
                    temp ['chunk_counts'] = count_chunks
                    temp ['total_bytes']  = total_uploaded
                    temp ['status'] = 'uploading...'
                    temp ['success'] = True
                    db_apn_logging.set(user_id+"@CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
                    print (temp)
                else:
                    f.close()
                    temp = {}
                    temp ['chunk_counts'] = count_chunks
                    temp ['total_bytes']  = total_uploaded
                    temp ['status'] = 'DONE'
                    temp ['success'] = True
                    db_apn_logging.set(user_id+"@CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
                    break
        except Exception as e:
            temp = {}
            temp ['chunk_counts'] = count_chunks
            temp ['total_bytes']  = total_uploaded
            temp ['status'] = e
            temp ['success'] = False
            db_apn_logging.set(user_id+"@CHUNK_DOWNLOAD", json.dumps(temp), ex=5)
            return  None

        return full_file_path

    stream = flask.request.files['file']
    stream.seek(0)
    full_file_path = receive_chunk(stream, full_file_path)

    return "DONE !"
Thư Sinh
  • 362
  • 4
  • 6