I am trying to transfer 10TB file using s3cmd to COS(Cloud object storage).
For transferring the file I am using below command:
python3 cloud-s3.py --upload s3cmd /data/10TB.txt pr-bucket1 --multipart-chunk-size-mb 1024 --limit-rate 100M --no-check-md5
It takes around 55 hours to transfer this file.
Are there any other parameters available that can increase its performance?
Amazon AWS, on the other hand, takes around 22 hrs to complete the transfer of the same file.
Why is the performance so poor for s3cmd? Is it how it's designed so?
Can anyone please help me with this?
Here is what I have in the cloud-s3.py file:
#!/usr/bin/python
import sys
import argparse
import subprocess
def main(argv):
parser = argparse.ArgumentParser(description='Cloud project. Prereq:pip3 ')
parser.add_argument("-i", "--install", help="Command to install either s3cmd / aws cli.", choices=["s3cmd", "aws_cli"], dest='installation', type= str)
parser.add_argument("-c", "--configure", help="Command to configure either s3cmd / aws cli.", choices=["s3cmd", "aws_cli"], dest='configure', type= str)
parser.add_argument("-u", "--upload", help="Command to transfer file to the bucket. protocol, file path and bucket name are required. Upload supports GPG encryption.", nargs=3, type=str)
parser.add_argument("-l", "--list", help="Command to list the bucket items. protocol, bucket name is required.", nargs=2, type=str)
parser.add_argument("-e", "--encrypt", help="Flag to send an encrypted file. The encryption password needs to be given while configuring s3cmd. Other users would need to use gpg -d <file> to decrypt it. And should enter the password you supplied.", action='store_true', dest='encryption')
parser.add_argument("-d", "--disable-multipart", help="Flag to disable multipart transfer for the current transfer. FYI, By default the multipart transfer is enabled for files larger than the default multipart chunk size. Refer .s3cfg text file.", dest='disable_multipart', action='store_true')
parser.add_argument("-s", "--multipart-chunk-size-mb", help="Size of each chunk of a multipart upload. Files bigger than SIZE are automatically uploaded as multithreaded-multipart, smaller files are uploaded using the traditional method. SIZE is in Mega-Bytes, default chunk size is 15MB, minimum allowed chunk size is 5MB, maximum is 5GB.", dest='chunk_size', type=str, nargs=1)
parser.add_argument("--sync", help="Conditional Transfer. Only files that doesn't exist at the destination in the same version are transferred. Note: sync doesn't support GPG encryption.", dest='sync_data', nargs=3, type=str)
parser.add_argument("--limit-rate", help="Limit the upload or download speed to amount bytes per second. Amount may be expressed in bytes, kilobytes with the k suffix, or megabytes with the m suffix", dest='limit_rate', nargs=1, type=str)
parser.add_argument("--no-check-md5", help="Do not check MD5 sums when comparing files for [sync]. Only size will be compared. May significantly speed up transfer but may also miss some changed files.", dest='no_checksum', action='store_true')
argument = parser.parse_args()
install = argument.installation
config = argument.configure
upload = argument.upload
list_bucket = argument.list
encrypt_enabled = argument.encryption
disable_multipart = argument.disable_multipart
chunk_size = argument.chunk_size
sync = argument.sync_data
limit_rate = argument.limit_rate
no_checksum = argument.no_checksum
if install == 's3cmd':
print("s3 cmd")
subprocess.call('sudo pip3 install s3cmd', shell=True)
elif install == 'aws cli':
print("aws cli")
if config == "s3cmd":
print("config s3 cmd")
subprocess.run('s3cmd --configure', shell=True)
elif config == "aws_cli":
print("config aws cli")
if upload:
print("upload")
protocol = argument.upload[0]
filename = argument.upload[1]
bucketname = "s3://"
bucketname += argument.upload[2]
print("protocol = ", protocol)
print("filename = ", filename)
print("bucket = ", bucketname)
upload_list = [protocol, "put", filename, bucketname]
if encrypt_enabled :
upload_list.append("-e")
if disable_multipart :
upload_list.append("--disable-multipart")
if chunk_size :
upload_list.append("--multipart-chunk-size-mb")
upload_list.append(argument.chunk_size[0])
if limit_rate :
upload_list.append("--limit-rate")
upload_list.append(argument.limit_rate[0])
print("\n Print upload list :\n")
print(upload_list)
subprocess.run(upload_list)
if list_bucket:
print("list")
protocol = argument.list[0]
bucketname = "s3://"
bucketname += argument.list[1]
subprocess.run([protocol, "ls", bucketname])
if sync:
print("executing s3 sync")
protocol = argument.sync_data[0]
filename = argument.sync_data[1]
bucketname = "s3://"
bucketname += argument.sync_data[2]
print("protocol = ", protocol)
print("filename = ", filename)
print("bucket = ", bucketname)
sync_list = [protocol, "sync", filename, bucketname]
if disable_multipart :
sync_list.append("--disable-multipart")
if chunk_size :
sync_list.append("--multipart-chunk-size-mb")
sync_list.append(argument.chunk_size[0])
if limit_rate :
sync_list.append("--limit-rate")
sync_list.append(argument.limit_rate[0])
if no_checksum :
sync_list.append("--no-check-md5")
print("\n Print sync list :\n")
print(sync_list)
subprocess.run(sync_list)
if __name__ == "__main__":
main(sys.argv[1:])