1

I've being tried to write some codes to download binary files from AWS S3 server.

I wrote that code right below and It looks like okay about ~200MB binary files, so I thought it worked.

but for big files like 200MB~, it would be downloaded but only front part of files.

for example, a video file (1.2GB) downloaded only front part(460MB ~ 700MB).

Why this can be happened? It's about ofstream features?

// 3. file download from s3
{
    string strTargetPath = hThis->m_strTargetPath;
    Aws::SDKOptions options;
    options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
    Aws::InitAPI(options);
    {
        // Download from s3 using GetObject

        char *bucket_name = "mybucket";
        std::string key_name = strTargetPath;

        Aws::Client::ClientConfiguration clientConfig;
        clientConfig.region = "ap-northeast-2";

        //Aws::S3::S3Client s3_client;
        std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
        Aws::S3::Model::GetObjectRequest object_request;
        object_request.WithBucket(bucket_name).WithKey(key_name.c_str());

        // parse file name from path
        string str_arr[1000];
        int str_cnt = 0;

        char *str_buff = new char[1000];
        strcpy(str_buff, strTargetPath.c_str());

        char *tok = strtok(str_buff, "/");
        while (tok != nullptr) {
            str_arr[str_cnt++] = string(tok);
            tok = strtok(nullptr, "/");
        }

        string fileName = str_arr[str_cnt - 1];

        auto get_object_outcome = s3_client.get()->GetObject(object_request);

        if (get_object_outcome.IsSuccess())
        {
            Aws::OFStream local_file;
            std::string strFileName = fileName;
            hThis->m_origFileNameString = strFileName;
            hThis->m_origFileName = strFileName.c_str();

            // Writing file downloaded
            local_file.open(hThis->m_origFileName, std::ios::out | std::ios::binary);
            local_file << get_object_outcome.GetResult().GetBody().rdbuf();
            hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");

            TCHAR programpath[_MAX_PATH];
            GetCurrentDirectory(_MAX_PATH, programpath);
            hThis->m_valOriginFolderPath.Format(_T("%s\\"), programpath);
            hThis->m_valOriginFolderPath += hThis->m_origFileName;
        }
        else
        {
            hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
                get_object_outcome.GetError().GetExceptionName() + " " +
                get_object_outcome.GetError().GetMessage() + "\n");
            hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
        }


    }
    Aws::ShutdownAPI(options);

}
June
  • 346
  • 4
  • 16
  • And one thing that i've found is task memory size using the program is similar with result file size. This means if the program will use 400MB memory, the result file size wil be 400MB – June Apr 10 '19 at 08:59

2 Answers2

2

Even now, I don't know exactly Why it doesn't work.

But I changed my method to as below and it worked.


this code makes chunk data downloaded is sent to disc directly.

So it doesn't use memory that much.(about 10~30MB)

// 3. file download from s3
   {
          string strTargetPath = hThis->m_strTargetPath;
          Aws::SDKOptions options;
          options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Trace;
          Aws::InitAPI(options);
          {
                 // Download from s3 using GetObject

                 char *bucket_name = "nemodax-upload-dev";
                 std::string key_name = strTargetPath;
                 Aws::Client::ClientConfiguration clientConfig;
                 clientConfig.region = "ap-northeast-2";
                 //Aws::S3::S3Client s3_client;
                 std::unique_ptr< Aws::S3::S3Client > s3_client(new Aws::S3::S3Client(clientConfig));
                 Aws::S3::Model::GetObjectRequest object_request;
                 object_request.WithBucket(bucket_name).WithKey(key_name.c_str());

                 // parse file name from path
                 string str_arr[1000];
                 int str_cnt = 0;
                 char *str_buff = new char[1000];
                 strcpy(str_buff, strTargetPath.c_str());
                 char *tok = strtok(str_buff, "/");
                 while (tok != nullptr) {
                       str_arr[str_cnt++] = string(tok);
                       tok = strtok(nullptr, "/");
                 }
                 string fileName = str_arr[str_cnt - 1];
                 // 다운로드하면서 스트림을 아래 fileName으로 지정하는 파일로 바로바로 저장 그래서 메모리를 별로 안먹는다.
                 object_request.SetResponseStreamFactory(
                       [=]() {
                       //return Aws::New<Aws::FStream>("S3DOWNLOAD", hThis->m_origFileName, std::ios_base::out | std::ios_base::binary);
                       return Aws::New<Aws::FStream>("S3DOWNLOAD", fileName, std::ios_base::out | std::ios_base::binary);
                 }
                 );
                 auto get_object_outcome = s3_client.get()->GetObject(object_request);
                 if (get_object_outcome.IsSuccess())
                 {
                       std::string strFileName = fileName;
                       hThis->m_origFileNameString = strFileName;
                       hThis->m_origFileName = strFileName.c_str();
                       hThis->Logger(CPrePackagerDlg::currentDateTime() + "file size: " + std::to_string(get_object_outcome.GetResult().GetContentLength()) + "\n");
                       hThis->Logger(CPrePackagerDlg::currentDateTime() + "download is done\n");
                       // 다운로드된 원본 파일 경로를 멤버변수로 등록-> 추후 암호화때 이 경로를 참조함.
                       // 파일경로 + 파일명 조합
                       TCHAR programpath[_MAX_PATH];
                       GetCurrentDirectory(_MAX_PATH, programpath);
                       hThis->m_valOriginFolderPath.Format(_T("%s\\"), programpath);
                       hThis->m_valOriginFolderPath += hThis->m_origFileName;
                 }
                 else
                 {
                       hThis->Logger(CPrePackagerDlg::currentDateTime() + "s3 download error: " +
                              get_object_outcome.GetError().GetExceptionName() + " " +
                              get_object_outcome.GetError().GetMessage() + "\n");
                       hThis->runSignal = CPrePackagerDlg::RunSignal::STAT_RUN_STOP;
                 }

          }
          Aws::ShutdownAPI(options);
   }
June
  • 346
  • 4
  • 16
1

Even now, I don't know exactly Why it doesn't work.

Because in your initial example it storing the whole file in memory. In your second code snippet you did the right thing by using fstream as the response stream. So it now writes to disk right away.

Marco M.
  • 2,956
  • 2
  • 29
  • 22