0
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.Map.Entry; 
import java.util.stream.Collectors;

public class LZW {

    private static Map<String, Integer> baseDict = new HashMap<String, Integer>();
    private static Map<Integer, String> revDict = new HashMap<Integer, String>();
    private static Scanner in = new Scanner(System.in);

    public static void makeText() {
        try {
            FileOutputStream fos = new FileOutputStream("text.txt");
            for (int n = 0; n < 5000; n++) {
                for (int i = 0; i < 256; i++) {
                    char ch = (char) i;
                    fos.write(ch);
                }
            }
            fos.close();
        } catch (IOException e) {
        // TODO Auto-generated catch block
            e.printStackTrace();
        }

    }

    public static void deflate(String fileName) {

        Map<String, Integer> dict = baseDict;
        try {
            FileInputStream fis = new FileInputStream(fileName);
            FileOutputStream fos = new FileOutputStream("compressed.lzw");
            DataInputStream dis = new DataInputStream(fis);
            DataOutputStream dos = new DataOutputStream(fos);
            int iBuf;
            char cBuf;
            String sBuf = "";

            iBuf = fis.read();
            cBuf = (char) iBuf;
            sBuf += cBuf;

            while ((iBuf = fis.read()) != -1) {

                cBuf = (char) iBuf;
                String temp = sBuf + cBuf;

                if (dict.containsKey(temp)) {
                    sBuf += cBuf;
                } else {
                    fos.write(dict.get(sBuf));
                    dict.put(temp, dict.size());
                    sBuf = "";
                    sBuf += cBuf;
                }

            }
            fos.write(dict.get(sBuf));

            fis.close();
            fos.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void inflate(String fileName) {

        Map<Integer, String> dict = revDict;
        try {
            FileInputStream fis = new FileInputStream(fileName);
            FileOutputStream fos = new FileOutputStream("decompressed.pdf");
            DataInputStream dis = new DataInputStream(fis);
            DataOutputStream dos = new DataOutputStream(fos);

            String entry, w;
            char ch;
            int prev, curr;

            prev = dis.read();
            w = dict.get(prev);
            dos.writeBytes(w);
            while ((curr = dis.read()) != -1) {
                if (dict.containsKey(curr)) {
                    entry = dict.get(curr);
                    dos.writeBytes(entry);
                    dict.put(dict.size(), (w + entry.charAt(0)));
                    w = entry;
                } else {
                    entry = w + w.charAt(0);
                    dos.writeBytes(entry);
                    dict.put(dict.size(), entry);
                    w = entry;
                }
            }

            dis.close();
            dos.close();
         } catch (IOException e) {
            e.printStackTrace();
        }

    }

    public static void init() {
        for (int i = 0; i < 256; i++) {
            char ascii = (char) i;
            baseDict.put(("" + ascii), i);
            revDict.put(i, ("" + ascii));
        }
        deflate("original.pdf");
        inflate("compressed.lzw");
    }

    public static void main(String[] args) {
        init();
    }

}

So I've been working on using LZW compression to compress diffenent files. My compression function seems to work fine as I'm able to compress a 5000kb pdf into approx 2200kb. However my decompression algorithm fails to decompress the pdf. The resulting file is still the same size as the compressed file and the pdf is corrupted. I was taking following some psuedo code :

read a character k;
output k;
w = k;
while ( read a character k )    
/* k could be a character or a code. */
{
    if k exists in the dictionary
        entry = dictionary entry for k;
        output entry;
        add w + entry[0] to dictionary;
        w = entry;
    else
        output entry = w + firstCharacterOf(w);
        add entry to dictionary;
        w = entry;
}

Could anyone steer me in the right direction as to why my decompression function is wrong?

InfinityCounter
  • 374
  • 2
  • 4
  • 14
  • What `int` values do you want `OutputStream.write(int)` to write for `InputStream.read()`to read? – greybeard Nov 28 '16 at 18:45
  • The int values I want to write are the unique index values that I have stored in my dictionary Map dict. I write those to the compressed file and want to read them back. – InfinityCounter Nov 28 '16 at 19:36
  • Can you please include the definitions of `revDict` and `baseDict`? – Chai T. Rex Nov 28 '16 at 19:59
  • I've updated the question to include the definitions. – InfinityCounter Nov 28 '16 at 20:40
  • `I write [the unique index values looked up in dict] to the compressed file` you wish: [`OutputStream.write(int b)`](https://docs.oracle.com/javase/8/docs/api/java/io/OutputStream.html#write-int-): `The 24 high-order bits of b are ignored`. – greybeard Nov 28 '16 at 21:50
  • You can't have used those definitions for the dictionaries, since they're not static, and the code wouldn't have compiled. Can you please copy and paste your entire file? – Chai T. Rex Nov 28 '16 at 22:30
  • @ChaiT.Rex I've copied the entire file to the post. – InfinityCounter Nov 29 '16 at 00:22
  • @greybeard I don't understand what you mean. Could you please explain a bit more? – InfinityCounter Nov 29 '16 at 00:23
  • You can use `OutputStream` and `InputStream` to transfer integers no less than 0 and less than 256. Every other information has to be _encoded_. T. Welch chose a [code using a growing integral number of bits](https://en.wikipedia.org/wiki/LZW#Variable-width_codes). Today, an [Arithmetic Code](https://en.wikipedia.org/wiki/Arithmetic_coding) seems more natural. – greybeard Nov 29 '16 at 08:31
  • (Please use a spelling checker. You are more likely to get useful answers when you succinctly state what you are trying to accomplish: most "things" in a PDF document are compressed, anyway, with `LZWDecode` among the choices.) – greybeard Nov 29 '16 at 08:35
  • (While much is to be said for keeping things explicit, consider using `import package.path.*` for presentation of more than, say five imports from a single package on SO/SE.) – greybeard Nov 29 '16 at 08:57

0 Answers0