4

I am wondering if any one can help implementing Parallel Zip Creation using ScatterZipOutputStream . I have searched a lot but no where I am finding an example for the same.

https://commons.apache.org/proper/commons-compress/zip.html

I have tried making Zip, Zipping a directory etc with ZipArchiveOutputStream . Now, I am trying to do that in parallel.

public static void makeZip(String filename) throws IOException,
        ArchiveException {
    File sourceFile = new File(filename);

    final OutputStream out = new FileOutputStream(filename.substring(0, filename.lastIndexOf('.')) + ".zip");
    ZipArchiveOutputStream os = new ZipArchiveOutputStream(out);
    os.setUseZip64(Zip64Mode.AsNeeded);

    os.putArchiveEntry(new ZipArchiveEntry(sourceFile.getName()));
    IOUtils.copy(new FileInputStream(sourceFile), os);
    os.closeArchiveEntry();
    os.close();
}

It should be able to process individual files as thread and then combine it to write the result zip.

dkb
  • 4,389
  • 4
  • 36
  • 54
Sameer Pradhan
  • 135
  • 2
  • 15

1 Answers1

6

Following is the working code of both zip and unzip:
1. Change path for sourceFolder and zipFilePath
2. Zipping only *.text type of files it can be any type or all the files
3. Unzipped files at sourceFolder/unzip/

Import following dependency in build.gradle or in pom.xml

implementation("org.apache.commons:commons-compress:1.18")
implementation("commons-io:commons-io:2.6")

Ref: https://mvnrepository.com/artifact/org.apache.commons/commons-compress/1.18 https://mvnrepository.com/artifact/commons-io/commons-io/2.6

//code

import org.apache.commons.compress.archivers.zip.*;
import org.apache.commons.compress.parallel.InputStreamSupplier;
import org.apache.commons.io.FileUtils;

import java.io.*;
import java.nio.file.Files;
import java.util.Iterator;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

public class ZipMain {

static ParallelScatterZipCreator scatterZipCreator = new ParallelScatterZipCreator();
static ScatterZipOutputStream dirs;

static {
    try {
        dirs = ScatterZipOutputStream.fileBased(File.createTempFile("java-zip-dirs", "tmp"));
    } catch (IOException e) {
        e.printStackTrace();
    }
}

public static void main(String[] args) throws IOException {
    String sourceFolder = "/Users/<user>/Desktop/";
    String zipFilePath = "/Users/<user>/Desktop/Desk.zip";
    String fileTypesToBeAddedToZip = "txt";
    zip(sourceFolder, zipFilePath, fileTypesToBeAddedToZip);
    unzip(zipFilePath, sourceFolder + "/unzip/");
}

private static void zip(String sourceFolder, String zipFilePath, String fileTypesToBeAddedToZip) throws IOException {
    OutputStream outputStream = null;
    ZipArchiveOutputStream zipArchiveOutputStream = null;
    try {

        File srcFolder = new File(sourceFolder);
        if (srcFolder.isDirectory()) {
            // uncomment following code if you want to add all files under srcFolder
            //Iterator<File> fileIterator = Arrays.asList(srcFolder.listFiles()).iterator();
            Iterator<File> fileIterator = FileUtils.iterateFiles(srcFolder, new String[]{fileTypesToBeAddedToZip}, true);

            File zipFile = new File(zipFilePath);
            zipFile.delete();
            outputStream = new FileOutputStream(zipFile);

            zipArchiveOutputStream = new ZipArchiveOutputStream(outputStream);
            zipArchiveOutputStream.setUseZip64(Zip64Mode.AsNeeded);

            int srcFolderLength = srcFolder.getAbsolutePath().length() + 1;  // +1 to remove the last file separator

            while (fileIterator.hasNext()) {
                File file = fileIterator.next();

                // uncomment following code if you want to add all files under srcFolder
                //if (file.isDirectory()) {
                //        continue;
                //    }

                String relativePath = file.getAbsolutePath().substring(srcFolderLength);


                InputStreamSupplier streamSupplier = () -> {
                    InputStream is = null;
                    try {
                        is = Files.newInputStream(file.toPath());
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    return is;
                };
                ZipArchiveEntry zipArchiveEntry = new ZipArchiveEntry(relativePath);
                zipArchiveEntry.setMethod(ZipEntry.DEFLATED);
                scatterZipCreator.addArchiveEntry(zipArchiveEntry, streamSupplier);
            }
            scatterZipCreator.writeTo(zipArchiveOutputStream);
            }
            if (zipArchiveOutputStream != null) {
                zipArchiveOutputStream.close();
            }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (outputStream != null) {
            outputStream.close();
        }
    }
}

private static void unzip(String zipFilePath, String destDir) {
    File dir = new File(destDir);
    // create output directory if it doesn't exist
    if (!dir.exists()) {
        dir.mkdirs();
    } else {
        dir.delete();
    }

    FileInputStream fis;
    //buffer for read and write data to file
    byte[] buffer = new byte[1024];
    try {
        fis = new FileInputStream(zipFilePath);
        ZipInputStream zis = new ZipInputStream(fis);
        ZipEntry ze = zis.getNextEntry();
        while (ze != null) {
            String fileName = ze.getName();

            File newFile = new File(destDir + File.separator + fileName);

            System.out.println("Unzipping to " + newFile.getAbsolutePath());

            //create directories for sub directories in zip
            String parentFolder = newFile.getParent();
            File folder = new File(parentFolder);
            folder.mkdirs();

            FileOutputStream fos = new FileOutputStream(newFile);
            int len;
            while ((len = zis.read(buffer)) > 0) {
                fos.write(buffer, 0, len);
            }
            fos.close();
            //close this ZipEntry
            zis.closeEntry();
            ze = zis.getNextEntry();
        }
        //close last ZipEntry
        zis.closeEntry();
        zis.close();
        fis.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

}
}

Ref: Fast zipping folder using java ParallelScatterZipCreator

dkb
  • 4,389
  • 4
  • 36
  • 54
  • I will try this out and accept it. Thank you in advance. – Sameer Pradhan Feb 13 '19 at 13:15
  • hi @dkb, what would be the code without lambda? I tried this but failing. `InputStreamSupplier streamSupplier = (InputStreamSupplier) Files.newInputStream(file.toPath());` – Sameer Pradhan Feb 14 '19 at 15:52
  • `java.lang.ClassCastException: sun.nio.ch.ChannelInputStream cannot be cast to org.apache.commons.compress.parallel.InputStreamSupplier at ZipMain.zip(ZipMain.java:63) at ZipMain.main(ZipMain.java:28) ` Error – Sameer Pradhan Feb 14 '19 at 16:01
  • You need to add dependencies in your build file or pom.xml, no need to cast any class. – dkb Feb 14 '19 at 18:40
  • I have to run this agaist JRE1.7 . So, need to remove lambda implementation I guess. Please guide. – Sameer Pradhan Feb 15 '19 at 05:09
  • I removed the lambda. `InputStreamSupplier streamSupplier = new FileInputStreamSupplier(file); //the inner class static class FileInputStreamSupplier implements InputStreamSupplier { private File sourceFile; FileInputStreamSupplier(File sourceFile) { this.sourceFile = sourceFile; } @Override public InputStream get() { InputStream is = null; try { is = Files.newInputStream(sourceFile.toPath()); } catch (IOException e) { e.printStackTrace(); } return is; } }` – Sameer Pradhan Feb 15 '19 at 05:26
  • It is generating the zip file. But, I am not able to open the zip file. Seems like I need to close some resources. Could you help ? – Sameer Pradhan Feb 15 '19 at 05:28
  • If you run the `unzip method` in code above, which is unzipping for you. Did you try that? – dkb Feb 15 '19 at 05:35
  • I am able to unzip it, if you are using `mac-osx` you can unzip with application https://theunarchiver.com/ – dkb Feb 15 '19 at 05:37
  • I have not tried unzipping. I am using windows with jre 1.7. Let me try that. – Sameer Pradhan Feb 15 '19 at 05:38
  • Okay, you can try command line of same app: https://theunarchiver.com/command-line – dkb Feb 15 '19 at 05:42
  • Yes. It is unzipping. – Sameer Pradhan Feb 15 '19 at 05:42
  • Glad it helped. – dkb Feb 15 '19 at 05:49
  • But why I am unable to open with windows. It should open right? – Sameer Pradhan Feb 15 '19 at 05:51
  • 1
    try the updated code, should work fine. added `if (zipArchiveOutputStream != null) { zipArchiveOutputStream.close(); }` – dkb Feb 15 '19 at 06:22
  • I have one doubt. Does it processes different file zipping in parallel ? – Sameer Pradhan Feb 15 '19 at 06:43
  • 1
    why not, just create multiple threads and add zip logic in Threads, passing different zip-folder path as an argument to each thread, that's it, post another question and will help you with that. or try yourself first, and then post question with problem you are facing. – dkb Feb 15 '19 at 06:46
  • I tried it. This is what I went so far. Please check once. >>> https://stackoverflow.com/questions/54709198/how-to-implement-scatterzipoutputstream-using-thread – Sameer Pradhan Feb 15 '19 at 12:18
  • I think there is a difference in "using multiple threads to make 1 zip file" and "using multiple threads to make different zip files i.e each thread of 1 zip file.". Above code already uses the parallel multiple threads as per the documentation – dkb Feb 15 '19 at 12:38
  • "It should be able to process different directory parallely and create a single zip out of it." --> in this case just use above code, it will work fine. – dkb Feb 15 '19 at 12:39
  • Thanks Mate. Understood. I have another question. How can I unzip with password? Any class or method let me do that in the above unzip method. – Sameer Pradhan Feb 16 '19 at 09:33
  • For the supplier creation we could create a **FunctionalInterface** and replace the call with this `ckSupp.wrap(() -> Files.newInputStream(file.toPath()) )` ### Implementation FunctionalInterface `@FunctionalInterface public interface ckSupp { R apply() throws E; static InputStreamSupplier wrap(ckSupp ckSupp) { return () -> { try { return ckSupp.apply(); } catch (Exception e) { throw new RuntimeException(e); } }; } }` – kelgwiin Jun 12 '20 at 15:15