0

I am trying to upload videos having file sizes from anywhere between 1 MB to 2 GB from the Unity3D editor. I am doing this by breaking each video into chunks of a byte array of 10 MB each and then uploading the chunks to the local wamp server and then merging them back into one single file. I am labeling each chunk with a serial number based on the queue and all the chunks are uploaded one by one, with the next upload only starting after the first is completed and is successful.

On the server-side, my PHP script looks like this:

define("CHUNK_FILE_EXTENSION", ".part");

if($_SERVER['REQUEST_METHOD'] == "POST") 
{
    $folder_name = isset($_POST['folder_name']) ? trim($_POST['folder_name']) : '';
    $target_file_name = isset($_POST['target_file_name']) ? trim($_POST['target_file_name']) : '';
    $chunkByteArray = isset($_FILES['chunk_byte_array']) ? $_FILES['chunk_byte_array'] : '';
    $currentChunkNumber = isset($_POST['current_chunk_number']) ? trim($_POST['current_chunk_number']) : '';
    $totalChunksNumber = isset($_POST['total_chunks_number']) ? trim($_POST['total_chunks_number']) : '';
    $startMerge = isset($_POST['start_merge']) ? trim($_POST['start_merge']) : '';
    $totalFileSize = isset($_POST['total_file_size']) ? trim($_POST['total_file_size']) : '';
    $startRollback = isset($_POST['start_rollback']) ? trim($_POST['start_rollback']) : '';
    
    function targetFileDirectoryPath($folder_name) {
        //$tempDir = $_SERVER['DOCUMENT_ROOT']."\\media\\temp\\test\\%s";
        $tempDir = $_SERVER['DOCUMENT_ROOT']."\\media\\temp\\test";
        return sprintf($tempDir, $folder_name);
    }

    function chunksFileDirectoryPath($folder_name) {
        return CombinePath(targetFileDirectoryPath($folder_name), "chunks");
    }

    function mergeChunkFiles($targetFileName, $chunkFileDir, $targetFileTempPath) {
        $files = array_diff(scandir($chunkFileDir), array('.','..',$targetFileName));
        sort($files);
        
        $final = fopen($targetFileTempPath, 'w');
        foreach ($files as $file) {
            $filePath = CombinePath($chunkFileDir, $file);
            if(($filePath != $targetFileTempPath) && (filesize($filePath) > 0)) {
                $myfile = fopen($filePath, "r");
                $buff = fread($myfile,filesize($filePath));
                $write = fwrite($final, $buff);
                fclose($myfile);
            }
        }
        fclose($final);
    }

    if (!empty($currentChunkNumber) && !empty($totalChunksNumber) && !empty($chunkByteArray)) {
        $chunkFileDir = chunksFileDirectoryPath($folder_name);
        $chunkFilePath = CombinePath($chunkFileDir, $currentChunkNumber.CHUNK_FILE_EXTENSION);
        $tempPath = $chunkByteArray['tmp_name'];
        if (createDirectory($chunkFileDir)) {
            if(move_uploaded_file($tempPath, $chunkFilePath)) {
                $responseJson = array(
                    "status" => 1, 
                    "message" => $currentChunkNumber." uploaded successfully"
                );
            }
            else {
                $responseJson = array(
                    "status" => 2, 
                    "message" => $currentChunkNumber." not uploaded to ".$chunkFilePath." from ".$tempPath, 
                    "uploaded_chunk_file" => $chunkByteArray, 
                    "is_uploaded_file" => is_uploaded_file($tempPath)
                );
            }
        }
        else {
            $responseJson = array(
                "status" => 3, 
                "message" => "Chunk file user directory not created @ ".$chunkFileDir
            );
        }
    }
    else if (!empty($startMerge) && !empty($totalFileSize)) {
        $targetFileName = $target_file_name;
        $chunkFileDir = chunksFileDirectoryPath($folder_name);
        $targetFileTempDir = NormalizePath(targetFileDirectoryPath($folder_name));
        $targetFileTempPath = CombinePath($targetFileTempDir, $targetFileName);

        if(createDirectory($targetFileTempDir)) {
            mergeChunkFiles($targetFileName, $chunkFileDir, $targetFileTempPath);
            removeFolder($chunkFileDir);
            if (filesize($targetFileTempPath) == $totalFileSize) {
                $responseJson = array(
                    "status" => 1, 
                    "message" => "Target file saved successfully!"
                );
            }
            else {
                $responseJson = array(
                    "status" => 2, 
                    "message" => "Target file size doesn't match with actual file size. ".
                        "Please try again! Target File Size: ".filesize($targetFileTempPath).
                        " & Input File Size: ".$totalFileSize);
            }   
        }
        else {
            $responseJson = array(
                "status" => 3, 
                "message" => "Unable to create target directory for merging chunks @ ".$targetFileTempDir
            );
        }
    }
    else if (!empty($startRollback)) {
        $responseJson = array(
            "status" => 4, 
            "message" => "Rollback successful!"
        );
    }
    else {
        $responseJson = array(
            "status" => 0, 
            "message" => "Invalid request parameters!!"
        );
    }
}
else {
    $responseJson = array(
        "status" => 0, 
        "message" => "Invalid request method!!"
    );
}

/* Output header */
header('Content-type: application/json;charset=utf-8');
echo json_encode($responseJson, JSON_UNESCAPED_UNICODE);

//Remove folder and its inner folder and files at the input path
function removeFolder($folder) {
    if (empty($folder)) {
        return;
    }
    $folder = NormalizePath($folder);
    if(is_file($folder)) {
        unlink($folder);
    }
    else if(is_dir($folder)) {
        $files = scandir($folder);
        foreach($files as $file) {
            if (( $file != '.' ) && ( $file != '..' )) {
                $file = CombinePath($folder, $file);
                if(is_dir($file)) {
                    removeFolder($file);
                }
                else {
                    unlink($file);
                }
            }
        }
        rmdir($folder);
    }
}

//Check if directory is exist return true, else create new directory and returns bool
function createDirectory($directoryPath) {
    $directoryPath = NormalizePath($directoryPath);
    if(!is_dir($directoryPath)) {
        return mkdir($directoryPath, 0775, true);
    }
    else {
        return true;
    }
}

//Method to combine local file or folder paths using a DIRECTORY_SEPARATOR
function NormalizePath($path) 
{
    //normalize
    $path = str_replace('/', DIRECTORY_SEPARATOR, $path);
    $path = str_replace('\\', DIRECTORY_SEPARATOR, $path);

    //remove leading/trailing dir separators
    if(!empty($path) && substr($path, -1) == DIRECTORY_SEPARATOR) {
        $path = substr($path, 0, -1);
    }
    return $path;
}

//Method to combine local file or folder paths using a DIRECTORY_SEPARATOR
function CombinePath($one, $other, $normalize = true) 
{
    //normalize
    if($normalize) {
        $one = NormalizePath($one);
        $other = NormalizePath($other);
    }
    
    //remove leading/trailing dir separators
    if(!empty($one)) {
        $one = rtrim($one, DIRECTORY_SEPARATOR);
    }
    if(!empty($other)) {
        $other = ltrim($other, DIRECTORY_SEPARATOR);
    }
    
    //return combined path
    if(empty($one)) {
        return $other;
    } elseif(empty($other)) {
        return $one;
    } else {
        return $one.DIRECTORY_SEPARATOR.$other;
    }
}

?>

It works for videos less than 100 MB, but somehow the videos greater than 100 MB does not play properly. I am testing it in local wampserver and upload_max_filesize and post_max_size are set to 20M in php.ini.

I have tried varying the chunk size to 5 MB, but still the same issue. The video gets uploaded successfully and I can also see the video filesize exactly the same as the one on the clientside, but still, somehow it gets corrupted in case of uploading a bigger video.

Just to re-iterate, it somehow works for videos less than 100 MB. As in, the videos are broken into chunks of 10 MB raw bytes uploaded to localhost and merged back to the full file and the video plays as good as the original one.

What am I doing wrong here? Please help.

Edit: Not sure if it might help, but I checked error in the video file using ffmpeg on the uploaded video that was of 106 MB. Below is the command I executed:

ffmpeg -v error -i {video_file_path} -f null - 2>{error_log_file_path}

Here is the error log file: https://drive.google.com/file/d/1YQ0DNtNlhl4cLUJaw20k91Vv6tfjnqsX/view?usp=sharing

Sanket Kale
  • 33
  • 1
  • 11
  • Running script from browser or console? – Divyesh Prajapati Jun 20 '22 at 12:28
  • Right now I am testing the PHP script on local wamp server, but same happens on an remote nginX server too. – Sanket Kale Jun 20 '22 at 12:59
  • On the client-side, I am reading the video file contents into a byte[] like this File.ReadAllBytes(filePath); and passing 10MB chunk of this byte[] in the request parameter of the POST method. Would it help my case if I convert the raw file binary into a base64 string before passing it to the web service and then concatenating the chunks of these base64 strings instead of raw binary? – Sanket Kale Jun 22 '22 at 06:20
  • Converting byte[] to base64 string didn't help. The results are the same. I guess I am doing something wrong while merging the chunks. @KJ could you please help me with a php snippet on how to merge the binary chunks, the way you explained in the comment? – Sanket Kale Jun 22 '22 at 07:07
  • Can you try to calculate a checksum in PHP (the client side!) over the read in byte array and then with an external tool over the raw file on the disk and compare those two? Maybe the error happens already on reading in? (100MB+ in RAM could be a configured limitation?) – cyberbrain Jun 23 '22 at 06:00
  • @KJ I don't think EOL could be an issue as the file's raw bytes are being chunked here and put back together on the server-side. Also, I00 MB is not a hard limit from what I can see, even a 94 MB video file did not work in my case, but somehow 86 MB video file worked. It is difficult to find videos of specific sizes to test but yes, the range came down to 86-94 MB after which the video chunking and merging according to the above code corrupts the video. – Sanket Kale Jun 24 '22 at 08:41
  • @cyberbrain I checked the checksum of the files on the client-side and server-side and get similar results. The checksum of video files less than ~90 MB is the same on both client-side and server-side, while the checksum of files larger than 90 MB is different. Why would the chunked file merging work for smaller video files and not work for files more than 90 MB? – Sanket Kale Jun 24 '22 at 09:32
  • don't ever use the `r` fopen mode, use `rb` (it makes no difference on most OS's, except on Windows where r and rt will randomly corrupt binary data (well, "randomly" meaning \n bytes..)) – hanshenrik Jun 26 '22 at 16:09

3 Answers3

1

On the server side, you read in the chunks completely into memory before you write out your target file. This approach is limited by PHPs maximum memory usage. It is configured via the memory_limit setting, that has a default of 128MB. You will need some memory for other things besides the actual final file as well, so the seen limit of ~ 100 MB looks as if it is a result of this limitation. (See the link, it also contains docs how to increase the limit.)

But increasing the memory limit is not a good solution in my opinion, because your server will not have endless memory. I recommend one of the following solutions:

  1. use rsync - it is widely used, available for many operating systems, often even preinstalled and you would not have to fiddle around with splitting up and rejoining the big files. I'm not an expert in it's usage and there are enough tutorials available so I will not explain the correct usage in detail. It is also super fast.

  2. if you cannot use rsync for whatever reason, you should write out the chunks to the disk on the server as soon as you receive them. You will have to ensure the correct order on upload (which will make parallel uploads for chunks of the same file not really feasible), and you have to use the fopen mode "a" (for append) instead of "w".

  3. if you upload the part-files individually and store them as part-files on disk on the server, for Linux you could just use the cat command (abbrevation of con_cat_enate) or for Windows the copy command with the + parameter to join the part files into one big one.

cyberbrain
  • 3,433
  • 1
  • 12
  • 22
  • @cyberbrain, I tried updating the memory_limit to 512M and restarted the local wamp server just to see if it caused the issue, but it didn't help. I still got the video of 106 MB corrupted and 84 MB worked fine. I updated my script to do fopen with "ab" instead of "w" or "a", but still the same issue. For the copy/cat command suggestion, I am looking for a solution related to PHP so that it is OS-independent. – Sanket Kale Jun 28 '22 at 06:41
1

i don't know exactly what is wrong with your script, but i can theorize:

  • you're using "w" and "r" fopen modes, they're horrible in theory, and if you're running on Microsoft Windows, they're horrible in practice as well, use "wb" and "rb", perhaps your files are getting corrupted beacuse of your non-binary fopen modes? (but that doesn't explain why it works on smaller video files)

  • you lack error checking on fwrite, if fwrite does not return strlen(input) you're ignoring a potential error. maybe try something like the fwrite_all function from https://www.php.net/manual/en/function.fwrite.php#125731

  • you also lack error checking on fread, at no point after $buff = fread($myfile,filesize($filePath)); do you make sure that strlen($buff) === filesize($filePath)

  • i had several problems uploading 10MB on ubuntu+php-fpm+nginx, nginx's default client_max_body_size was 1M, php-fpm's default php.ini upload_max_filesize was 8M, and post_max_size was 2M (or maybe it was the other way around, either way...)

buuuut your script is kind of hard to debug/read nonetheless, how about a Kiss simpler implementation?

my attempt:

Warning, there is no authentication to this code, and a hacker could easily pwn your webserver with this code, uploading evil.php as a "movie".

<?php
declare(strict_types = 1);
function jsresponse($response)
{
    header("Content-Type: application/json");
    echo json_encode($response, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE | JSON_THROW_ON_ERROR);
}

if ($_SERVER['REQUEST_METHOD'] !== "POST") {
    http_response_code(405);
    jsresponse(["error" => "invalid request method"]);
    die();
}
$folder_name = isset($_POST['folder_name']) ? trim($_POST['folder_name']) : '';
if (empty($folder_name)) {
    $folder_name = getcwd();
} elseif (!is_dir($folder_name) && !mkdir($folder_name, 0755, true)) {
    http_response_code(400);
    jsresponse(["error" => "could not create folder name"]);
    die();
}
if (!chdir($folder_name)) {
    http_response_code(400);
    jsresponse(["error" => "could not access folder"]);
    die();
}
$target_file_name = isset($_POST['target_file_name']) ? trim($_POST['target_file_name']) : '';
if (empty($target_file_name)) {
    http_response_code(400);
    jsresponse(["error" => "target file name is empty"]);
    die();
}
if (!touch($target_file_name)) {
    http_response_code(400);
    jsresponse(["error" => "could not touch target file"]);
    die();
}
if (empty($_FILES['chunk_byte_array']['tmp_name'])) {
    http_response_code(400);
    jsresponse(["error" => "chunk byte array is missing"]);
    die();
}
// todo: ram-optimize with stream_copy_to_stream(), this is a very ram-hungry way of appending
$bytes_to_append = file_get_contents($_FILES['chunk_byte_array']['tmp_name']);
if (strlen($bytes_to_append) !== $_FILES['chunk_byte_array']['size']) {
    // should never happen
    http_response_code(500);
    jsresponse(["error" => "could not read chunk byte array file.."]);
    die();
}
$bytes_appended = file_put_contents($target_file_name, $bytes_to_append, FILE_APPEND | LOCK_EX);
if (strlen($bytes_to_append) !== $bytes_appended) {
    http_response_code(500);
    jsresponse(["error" => "could not append all bytes!",
        "data" => ["bytes_to_append" => strlen($bytes_to_append), "bytes_actually_appended" => $bytes_appended,
            "error_get_last" => error_get_last()]]);
}
jsresponse("success!");

testing it:

$ pwd
/temp
$ b3sum John.Wick3.mp4 
2c6445acd31ac3153df52917ca4ab003624377cf50b6e78d0b3c8065d7d2d9f6  John.Wick3.mp4
$ du -h John.Wick3.mp4 
2.1G    John.Wick3.mp4
$ cat John.Wick3.mp4 | php -r '$i=0;while(!feof(STDIN) && false!==($str=stream_get_contents(STDIN,10*1024*1024))){++$i;file_put_contents("John.Wick3.mp4.part".$i,$str);}'
$ ls | sort -V | head
John.Wick3.mp4
John.Wick3.mp4.part1
John.Wick3.mp4.part2
John.Wick3.mp4.part3
John.Wick3.mp4.part4
John.Wick3.mp4.part5
John.Wick3.mp4.part6
John.Wick3.mp4.part7
John.Wick3.mp4.part8
John.Wick3.mp4.part9
$ cat $(ls | grep -i part | sort -V) | b3sum
2c6445acd31ac3153df52917ca4ab003624377cf50b6e78d0b3c8065d7d2d9f6  -
$ ls | grep -i part | sort -V | xargs --max-args=1 --max-procs=1 '-I{}' curl \
-F folder_name="testfolder" \
-F target_file_name="John.Wick3.mp4" \
-F chunk_byte_array=@"{}" \
http://localhost:81/upload.php
"success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!""success!"
$ du -h /srv/http/default/www/testfolder/John.Wick3.mp4 
2.1G    /srv/http/default/www/testfolder/John.Wick3.mp4
$ b3sum /srv/http/default/www/testfolder/John.Wick3.mp4
2c6445acd31ac3153df52917ca4ab003624377cf50b6e78d0b3c8065d7d2d9f6  /srv/http/default/www/testfolder/John.Wick3.mp4
$ b3sum John.Wick3.mp4
2c6445acd31ac3153df52917ca4ab003624377cf50b6e78d0b3c8065d7d2d9f6  John.Wick3.mp4

success! uploaded a 2.1GB file with no corruption, as proven by the b3sum being equivalent :) (btw i'm sure there's a better way to split the movie, couldn't think of any though)

hanshenrik
  • 19,904
  • 4
  • 43
  • 89
0

I have created a JavaFX client to send large files in chunks of max post size (I am using 2 MB) and a PHP receiver script to assemble the chunks into original file. I am releasing the code under apache license here : http://code.google.com/p/gigaupload/ Feel free to use/modify/distribute.