0

Trying to unzip password protected file in GCS but getting error in below code. Below code work fine with normal .gz files but fails to unzip password protected files.

storage_client = storage.Client()
source_bucket = 'bucket'
source_bucket1 = storage_client.bucket(source_bucket)
blob = source_bucket1.blob("path/filename.gz")
zipbytes = io.BytesIO(blob.download_as_string())
        print(zipbytes)
        if is_zipfile(zipbytes):
            with ZipFile(zipbytes, 'r') as myzip:
                for contentfilename in myzip.namelist():
                    contentfile = myzip.read(contentfilename)
                    contentfilename = contentfilename[:-3]
                    blob1 = bucket.blob(contentfilename)
                    blob1.upload_from_string(contentfile)
                    print(f'File decompressed from {zipfilename_with_path} to {contentfilename}')
        blob.delete()
abhi
  • 11
  • 4

2 Answers2

0

You can use Python, e.g. from a Cloud Function:

from google.cloud import storage
    from zipfile import ZipFile
    from zipfile import is_zipfile
    import io

    def zipextract(bucketname, zipfilename_with_path):

        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucketname)

        destination_blob_pathname = zipfilename_with_path
        
        blob = bucket.blob(destination_blob_pathname)
        zipbytes = io.BytesIO(blob.download_as_string())

        if is_zipfile(zipbytes):
            with ZipFile(zipbytes, 'r') as myzip:
                for contentfilename in myzip.namelist():
                    contentfile = myzip.read(contentfilename)
                    blob = bucket.blob(zipfilename_with_path + "/" + contentfilename)
                    blob.upload_from_string(contentfile)

    zipextract("mybucket", "path/file.zip") # if the file is gs://mybucket/path/file.zip
  • Thanks for your reply but this logic is working on .gz files and not on password protected .gz files. – abhi Feb 16 '22 at 17:15
0

Am able to read .csv.gz password protected file using below logic. All of this is done in memory. It has performance issue if the file is huge but works fine.

        storage_client = storage.Client()
        source_bucket = '<bucket-name>'
        source_bucket1 = storage_client.bucket(source_bucket)
        bukcet_folder = '/unzip'
        blob = source_bucket1.blob(path)
        zipbytes = io.BytesIO(blob.download_as_string())
        with ZipFile(zipbytes, 'r') as myzip:
            print("Inside the zipfiles loop")
            with myzip.open('filename.csv',pwd=b'password') as myfile:
                print("Inside zip 2 loop")
                contentfile = myfile.read()
                contentfilename = bucket_folder + destination_file_path
                blob1 = source_bucket1.blob(contentfilename)
                blob1.upload_from_string(contentfile)
              ```
abhi
  • 11
  • 4