1

I have written a small script downloading data from MinIO, zipping them and uploading the created zip file.

package main

import (
    "archive/zip"
    "bytes"
    "context"
    "fmt"
    "io"
    "log"
    "sync"

    "github.com/minio/minio-go/v7"
    "github.com/minio/minio-go/v7/pkg/credentials"
)

type File struct {
    ObjectInfo minio.ObjectInfo
    Object     io.Reader
}

func downloadFiles(minioClient *minio.Client, ctx context.Context, bucketName string, directoryNames []string, fileCh chan<- File) {
    defer close(fileCh)

    for _, directoryName := range directoryNames {
        objectCh := minioClient.ListObjects(ctx, bucketName, minio.ListObjectsOptions{
            Prefix:    directoryName,
            Recursive: true,
        })

        for objectInfo := range objectCh {
            object, err := minioClient.GetObject(ctx, bucketName, objectInfo.Key, minio.GetObjectOptions{})

            if err != nil {
                log.Fatalln(err)
            }

            fileCh <- File{
                ObjectInfo: objectInfo,
                Object:     object,
            }
        }
    }
}

func zipFiles(zipWriter *zip.Writer, fileCh <-chan File) {
    for file := range fileCh {
        fmt.Println("Writing ", file.ObjectInfo.Key)

        writer, err := zipWriter.Create(file.ObjectInfo.Key)

        if err != nil {
            log.Fatalln(err)
        }

        if _, err := io.Copy(writer, file.Object); err != nil {
            log.Fatalln(err)
        }
    }
}

func uploadZip(minioClient *minio.Client, ctx context.Context, bucketName string, zipObjectName string, buf *bytes.Buffer) {
    reader := bytes.NewReader(buf.Bytes())

    if _, err := minioClient.PutObject(ctx, bucketName, zipObjectName, reader, int64(reader.Len()), minio.PutObjectOptions{}); err != nil {
        log.Fatalln(err)
    }
}

func main() {
    ctx := context.Background()
    endpoint := "play.min.io"
    inputBucketName := "unitelmed"
    outputBucketName := "unitelmed"
    directoryNames := []string{"GENERAL"}
    zipObjectName := "my-files.zip"

    minioClient, err := minio.New(endpoint, &minio.Options{
        Creds: credentials.NewStaticV4("Q3AM3UQ867SPQQA43P2F", "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG", ""),
        Secure: true,
    })

    if err != nil {
        log.Fatalln(err)
    }

    buf := new(bytes.Buffer)
    fileCh := make(chan File)

    go downloadFiles(minioClient, ctx, inputBucketName, directoryNames, fileCh)

    wg := sync.WaitGroup{}
    wg.Add(1)

    go func() {
        defer wg.Done()

        zipWriter := zip.NewWriter(buf)
        defer zipWriter.Close()

        zipFiles(zipWriter, fileCh)
    }()

    wg.Wait()

    uploadZip(minioClient, ctx, outputBucketName, zipObjectName, buf)
}

As far as I could get it work the zipWriter needs to be closed before uploading the zip file. This is at least blocking memory in the size of the zip (and probably even more).

Is there a way to stream the data from the source to the zip writer and further on to the drain? In my imagination this will block only a few MB in memory.

Jonathan Hall
  • 75,165
  • 16
  • 143
  • 189
Fabian
  • 546
  • 5
  • 14
  • 4
    You cannot stream zip files as they are written, the same way you can a `gzip` file, for instance, because a zip file contains an index, that depends on knowing the full contents of the file before that index can be written. – Jonathan Hall Jun 08 '23 at 10:23

0 Answers0