-2

I'm trying to code a lzw data compressor/decompressor, br/>
So i did make a compressor (who seems to work but maybe not) using this Algorithm but when i try to decompress it i get a weird result not looking at all like the original file ...

I think my mistake is in the way i get and/or use data from files but i dont know for sure...So here is the functions i use to compress and decompress and of course any critic / question is welcome .

EDIT : TO GIVE A MCVE
Input text file getting compressed containing : banana_bandana
Output result by decompression : ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü# ìû# ÿ# ü#


EDIT 2 : All useful function to reproduce the output :

using Encoding = uint16_t;
#define MAX 4096 //2^12
static int code = 0;

string combi (string s , char c ){
    s += c ;
    return s;
}

string findkey(unordered_map<string, int>& Dico , int val ){

    string key ;
    string empty ;
    unordered_map<string, int>::const_iterator it;

    for (it = Dico.begin(); it != Dico.end(); ++it){
        if (it->second == val){
            key = it->first;
            return key;
        }else {return empty;}
    }
}
string findkey2(unordered_map<string, Encoding>& Dico , Encoding val ){

    string key ;
    string empty ;
    unordered_map<string, Encoding>::const_iterator it;

    for (it = Dico.begin(); it != Dico.end(); ++it){
        if (it->second == val){
            key = it->first;
            return key;
        }else {return empty;}
    }
}

void InitDico (unordered_map<string, int>& Dico) {
    Dico.clear();
    string s = "";
    char c;
    for (code = 0; code < 256; code++)
    {
        c = (char)code;
        s += c;
        Dico[s] = code;
        s.clear();
    }

}

void InitDico2 (unordered_map<string, Encoding>& Dico) {
    Dico.clear();
    string s = "";
    char c;
    for (code = 0; code < 256; code++)
    {
        c = (char)code;
        s+= c;
        Encoding sizeplus = Dico.size();
        Dico[s] = sizeplus;
        s.clear();
    }
}


void compress(ifstream &is, ofstream &of){
    unordered_map<string,int> Dico ;
    InitDico(Dico);
    string s = "";
    char c ;

    while(is.get(c)){
        if(Dico.size() == MAX){
            InitDico(Dico);
        }
        if(Dico.count(combi(s,c))){
            s += c;
        }else{
            Dico.insert({(combi(s,c)),code});
            code ++;
            of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(code));
            s = c;
        }
    }
    of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(code));
} 

void compress2(ifstream &is, ofstream &of){
    unordered_map<string,Encoding> Dico ;
    InitDico2(Dico);
    string s = "";
    char c ;
    int max = numeric_limits<Encoding>::max();
    while(is.get(c)){
        if(Dico.size() == max){
            InitDico2(Dico);
        }
        if(Dico.count(combi(s,c))){
            s += c;
        }else{
            Encoding sizeplus = Dico.size();
            Dico[{(combi(s,c))}] = sizeplus;
            of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(Encoding));
            s = c;
        }
    }
    of.write(reinterpret_cast<const char *> (&Dico.at(s)),sizeof(Encoding));
} 

void decompress(ifstream &is, ofstream &of){
    unordered_map<string,int> Dico ;
    InitDico(Dico);
    string s , prevstring;
    char c ;
    int prevcode,currcode ;
    is.read(reinterpret_cast<char *>(&prevcode),sizeof(prevcode));
    s = findkey(Dico,prevcode);
    of.write(reinterpret_cast<const char *> (&s) , sizeof(s));

    while(is.read(reinterpret_cast<char *>(&currcode),sizeof(currcode))){
        s = findkey(Dico,currcode);
        of.write(reinterpret_cast<const char *> (&s) , sizeof(s));
        c =s[0];
        prevstring = findkey(Dico,prevcode);
        Dico.insert({(combi(prevstring,c)),code});
        prevcode = currcode;
    }
}

void decompress2(ifstream &is, ofstream &of){//Decompression using uint16 and another algorithm
    unordered_map<string,Encoding> Dico ;
    InitDico2(Dico);
    Encoding n ;
    is.read(reinterpret_cast<char*>(&n),sizeof(n));
    string v = findkey2(Dico,n);
    string w ;
    string entry;
    of.write(reinterpret_cast<const char *> (&v) , sizeof(v));
    w = v ;
    while(is.read(reinterpret_cast<char *>(&n),sizeof(n))){
        v = findkey2(Dico,n);
        if (Dico.count(v)){
            entry = v ;
        }else{entry = combi(w,w[0]);}
        of.write(reinterpret_cast<const char *> (&entry) , sizeof(entry));
        Encoding sizeplus =  Dico.size();
        Dico[combi(w,entry[0])]=sizeplus;
        w = entry;
    }

}
Haza
  • 97
  • 1
  • 2
  • 10
  • Unless you are inventing new algorithms for things like compression, encryption, etc, it is usually better to use pre-existing implementations instead. There are plenty of libraries readily available if you look around. – Remy Lebeau Jun 22 '18 at 01:09
  • 1
    @RemyLebeau im not really trying to use it for a purpose it just a way to learn C++ and it seemed like an easy algorithm to code ... – Haza Jun 22 '18 at 01:13
  • 1
    Does the data you've encoded decompress correctly with any other utility that can decompress lzw? – Retired Ninja Jun 22 '18 at 01:27
  • Please provide [**A Minimal, Complete, and Verifiable Example (MCVE)**](http://stackoverflow.com/help/mcve) including example input and output you currently get and why that doesn't match the expected. As I read your question, you are essentially asking "Did I implement the process shown in the link correctly?" That is overbroad without a MCVE. – David C. Rankin Jun 22 '18 at 01:32
  • @DavidC.Rankin Sorry for the inconvenience if you have any idea to make my post more clear and readable please ask . – Haza Jun 22 '18 at 01:51
  • FYI, doing an algorithm like MD5 or SHA1 is a much easier (but comparable) beginner project. And obviously, you should never actually *use* your own implementation. – o11c Jun 22 '18 at 01:54
  • @o11c I will have a look at that , Thank you – Haza Jun 22 '18 at 01:55
  • @RetiredNinja i've just tried and no it doesnt ... – Haza Jun 22 '18 at 02:11
  • @Haza, yes please post a MCVE that we can compile and test. That helps us help you. Don't expect people to write an example to fill in the parts of the code you left out. I've looked at the page. Much will depend on your *dictionary* declaration and handling. That isn't provided in your question. Please visit the link I posted. It explains how to provide a MCVE. I think implementing the algorithm will be great learning for you. We are happy to help, but please help us help you. – David C. Rankin Jun 22 '18 at 02:16
  • @DavidC.Rankin Again really sorry , now i provided all the things needed to reproduce the error . – Haza Jun 22 '18 at 02:24
  • Here's an example that encodes and decodes "banana_bandana". https://ideone.com/7lHvxv I found the various examples here to be quite helpful: https://rosettacode.org/wiki/LZW_compression – Retired Ninja Jun 22 '18 at 04:02
  • @RetiredNinja Thank you , looking at those example it's seems like my compression function is doing her job . – Haza Jun 22 '18 at 04:47

1 Answers1

1

One problem I see is when you're writing out your decompressed data to the file, you write the string object, and not the string data contained within the object. To do that, you need to get to the data the object holds. In decompress, replace both of your writes with

of.write(s.c_str(), s.length());

A similar change needs to be made in decompress2 (twice).

1201ProgramAlarm
  • 32,384
  • 7
  • 42
  • 56