I am trying to use Python to create a ZipFile object in-memory, and write a single file, also created in-memory, into the ZipFile object, and then upload the file to Google Cloud Storage.
My file is not actually getting compressed. Any idea what I might be doing wrong?
I realize there may be a fancier way of getting the row data into the file object, but apart from that, I'm really just trying to figure out why the resulting zip file is not coming out compressed at all.
UPDATE: code sample now excludes any interaction with Google Cloud Services (GCS, etc.), and instead just writes the files to disk.
It seems that when I write the file to disk first, then create the ZipFile, the result is compressed as expected, but when I add the StringIO contents directly from memory to the ZipFile object, the contents are not compressed.
import random, io, argparse, os, string
from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED
parser = argparse.ArgumentParser()
parser.add_argument("--row_limit", default=1000)
parser.add_argument("--file_name", default='file.txt', type=str)
parser.add_argument("--archive_name", default='file.zip', type=str)
parser.add_argument("--snapshot_millis", default=0, type=int)
args = parser.parse_args()
# imagine this has lots and lots of data in it, coming from a database query result
rows = [{
'seq_no': ''.join(random.choices(string.ascii_uppercase + string.digits, k=args.row_limit)),
'csv': ''.join(random.choices(string.ascii_uppercase + string.digits, k=args.row_limit))
}] * args.row_limit
archive = io.BytesIO()
# create zip archive in memory
with ZipFile(archive, 'w', compression=ZIP_DEFLATED, compresslevel=9) as zip_archive:
count = 0
file_contents = io.StringIO()
for row in rows:
if count > args.row_limit:
break
count += 1
file_contents.write(f"{row['seq_no']},{row['csv']}\n")
# write file to zip archive in memory
zip_file = ZipInfo(args.file_name)
zip_archive.writestr(zip_file, file_contents.getvalue())
# also write file to disk
with open(args.file_name, mode='w') as f:
print(file_contents.getvalue(), file=f)
print(f"StringIO Size: {file_contents.tell()}")
print(f"Text File Size On Disk: {os.path.getsize(args.file_name)}")
archive.seek(0)
with open(args.archive_name, 'wb') as outfile:
outfile.write(archive.getbuffer())
print(f"Zip File Created from File In Memory: {os.path.getsize(args.archive_name)}")
ZipFile(args.archive_name, mode='w', compression=ZIP_DEFLATED, compresslevel=9).write(args.file_name)
print(f"Zip File Created from File On Disk: {os.path.getsize(args.archive_name)}")