2

The code has been updated to use unique_ptr and namespaces. NOTE: i tried to implement anonymous namespace inside of namespace huffman but it wont allow to separate the file into .cpp and .h. any critique on the current code is welcome. feel free to use the code as stated in the MIT agreement.

source.cpp:

/*
#######################################################################################################################################
Copyright 2017 Daniel Rossinsky

Permission is hereby granted, free of charge, to any person obtaining a copy of this software
and associated documentation files (the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#######################################################################################################################################
*/

#include"Huffman.h"

int main(int argc, char *argv[]) {

    if (argc < 4) std::cout << "Too few arguments\n";
    else if (argc == 4) {
        if (*argv[1] == 'c') Huffman::compress(argv[2], argv[3], argv[3]);
        else if (*argv[1] == 'd') {
            std::string temp{ argv[2] };
            std::size_t pathEnd{ temp.find_last_of("/\\") };
            Huffman::decompress(argv[2], argv[3], temp.substr(0, pathEnd + 1));
        }//end of else if
        else std::cout << "Unknown command\n";
    }//end of else if
    else std::cout << "Too much arguments\n";
    return 0;

    //Huffman::compress("C:/Users/User/Desktop/test.txt", "C:/Users/User/Desktop/", "C:/Users/User/Desktop/");
    //Huffman::decompress("C:/Users/User/Desktop/testCompressed.bin", "C:/Users/User/Desktop/testKey.bin", "C:/Users/User/Desktop/");
}

/*
cmd example:
-----------
compress:
syntax: huffman.exe c filePath dest
example: C:/Users/User/Desktop/huffman.exe c C:/Users/User/Desktop/test.txt C:/Users/User/Desktop/

decompress:
syntax: huffman.exe d filePath keyPath
example: C:/Users/User/Desktop/huffman.exe d C:/Users/User/Desktop/testCompressed.bin C:/Users/User/Desktop/testKey.bin

NOTE:
-----
You can use the commented code in main instead
*/

Huffman.h:

#ifndef HUFFMAN
#define HUFFMAN

#include<iostream>
#include<map>
#include<vector>
#include<string>
#include<deque>
#include<memory>

namespace Huffman {
    namespace inner {
        struct node;

        /*type aliases*/
        using Table     = std::map<char, std::size_t>;
        using Cypher    = std::map<char, std::vector<bool> >;
        using smartNode = std::unique_ptr<node>;
        /*type aliases*/

        struct node {
            smartNode   m_left;
            smartNode   m_right;
            std::size_t m_frequency{};
            char        m_data{};

            node() = default;
            node(smartNode left, smartNode right) :
                m_left{ std::move(left) }, m_right{ std::move(right) } {
                m_frequency = m_left->m_frequency + m_right->m_frequency;
            }
        };

        struct functor {
            bool operator()(smartNode const& first, smartNode const& second) const
            {
                return first->m_frequency > second->m_frequency;
            }
        };

        /*shared functions*/
        smartNode makeTree(std::deque<smartNode>& nodeData);
        void readFile(const std::string& filePath, std::string& fileContent);
        std::deque<smartNode> storeFreqTable(const Table& table);
        /*shared functions*/

        /*compressor related functions*/
        void setNameAndExten(const std::string& filePath, std::string& fileName, std::string& fileExten);
        void UpdateFreqTable(Table& freqTable, const std::string& fileContent);
        void encode(smartNode const &root, Cypher& key, std::vector<bool>& code);
        void createBinaryFile(const std::string& filePath,
                              const std::string& fileName,
                              const std::string& fileContent,
                              Cypher& key,
                              std::vector<bool>& code);
        void createKey(const std::string& filePath,
                       const Table& freqTable,
                       const std::string& fileName,
                       const std::string& fileExten);
        /*compressor related functions*/

        /*decompressor related functions*/
        void readKey(Table& freqTable,
                     std::string& fileExten,
                     const std::string keyPath,
                     std::string& fileContent);
        std::size_t decodedContentSize(const Table& freqTable);
        void decode(const std::string& filePath,
                    std::string& decodedContent,
                    smartNode root,
                    std::string& fileName,
                    std::string& fileContent);
        void createFile(const std::string& decodedContent,
                        const std::string& locToDecompress,
                        const std::string& fileName,
                        const std::string& fileExten);
        /*decompressor related functions*/
    }//end of inner namespace

    void compress(const std::string& filePath, const std::string& locToCreateKey, const std::string& locToCompress);
    void decompress(const std::string& filePath, const std::string& keyPath, const std::string& locToDecompress);
}//end of Huffman namespace

#endif

Huffman.cpp:

#include"Huffman.h"
#include<fstream>
#include<sstream>
#include<algorithm>
#include<cstdlib>


/*----------------SHARED_FUNCTIONS_START----------------*/
Huffman::inner::smartNode Huffman::inner::makeTree(std::deque<smartNode>& nodeData) {
    while (nodeData.size() > 1) {
        std::sort(nodeData.begin(), nodeData.end(), functor());
        smartNode leftSon{ std::move(nodeData.back()) };
        nodeData.pop_back();
        smartNode rightSon{ std::move(nodeData.back()) };
        nodeData.pop_back();
        smartNode parent = std::make_unique<node>(std::move(leftSon), std::move(rightSon));
        nodeData.emplace_back(std::move(parent));
    }//end of while loop
    return std::move(nodeData.front());
}

void Huffman::inner::readFile(const std::string& filePath, std::string& fileContent) {
    std::ifstream inFile(filePath, std::ios::binary);
    if (inFile.is_open()) {
        auto const start_pos{ inFile.tellg() };
        inFile.ignore(std::numeric_limits<std::streamsize>::max());
        std::streamsize char_count{ inFile.gcount() };
        inFile.seekg(start_pos);
        fileContent = std::string(static_cast<std::size_t>(char_count), '0');
        inFile.read(&fileContent[0], static_cast<std::streamsize> (fileContent.size()));
        inFile.close();
    }//end of if
    else {
        std::cout << "Unable to open file\n";
        std::exit(EXIT_FAILURE);
    }//end of else
}

std::deque<Huffman::inner::smartNode> Huffman::inner::storeFreqTable(const Table& table) {
    std::deque<smartNode> nodeData;
    for (const auto& index : table) {
        smartNode leaf = std::make_unique<node>();
        leaf->m_data = index.first;
        leaf->m_frequency = index.second;
        nodeData.emplace_back(std::move(leaf));
    }//end of for loop
    return nodeData;
}
/*-----------------SHARED_FUNCTIONS_END-----------------*/

/*-----------------COMPRESSOR_FUNCTIONS_START-----------------*/
void Huffman::inner::setNameAndExten(const std::string& filePath,
                                     std::string& fileName,
                                     std::string& fileExten) {
    std::size_t foundName{ filePath.find_last_of("/\\") };
    std::size_t foundExten{ filePath.find_last_of('.') };
    fileName = filePath.substr(foundName + 1, foundExten - foundName - 1);
    fileExten = filePath.substr(foundExten);
}

void Huffman::inner::UpdateFreqTable(Table& freqTable, const std::string& fileContent) {
    for (const auto& data : fileContent) {
        ++freqTable[data];
    }//end of for loop
}

void Huffman::inner::encode(smartNode const &root,
                            Cypher& key,
                            std::vector<bool>& code) {
    if (root->m_left != nullptr) {
        code.emplace_back(false);
        encode(std::move(root->m_left), key, code);
    }//end of if
    if (root->m_right != nullptr) {
        code.emplace_back(true);
        encode(std::move(root->m_right), key, code);
    }//end of if 
    if (root->m_data) key[root->m_data] = code;
    if (!code.empty()) code.pop_back();
}

void Huffman::inner::createBinaryFile(const std::string& filePath,
                                      const std::string& fileName,
                                      const std::string& fileContent,
                                      Cypher& key,
                                      std::vector<bool>& code) {
    int offSet{}; int tempBuff{}; int inBuff{};
    std::ofstream outFile(filePath + fileName + "Compressed.bin", std::ios::binary);
    if (outFile.is_open()) {
        for (const auto& data : fileContent) {
            tempBuff = data;
            code = key[static_cast<char>(tempBuff)];
            for (const auto& index : code) {
                inBuff |= index << (7 - offSet);
                ++offSet;
                if (offSet == 8) {
                    offSet = 0;
                    outFile.put(static_cast<char>(inBuff));
                    inBuff = 0;
                }//end of if
            }//end of for loop
        }//end of for loop
        outFile.close();
    }//end of if
    else {
        std::cout << "Unable to open file\n";
        std::exit(EXIT_FAILURE);
    }//end of else
}

void Huffman::inner::createKey(const std::string& filePath,
                               const Table& freqTable,
                               const std::string& fileName,
                               const std::string& fileExten) {
    std::ofstream outFile(filePath + fileName + "Key.bin", std::ios::binary);
    if (outFile.is_open()) {
        auto&& index{ freqTable.begin() };
        do {
            outFile.put(index->first);
            outFile.put(' ');
            outFile << std::to_string(index->second);
            ++index;
            if (index != freqTable.end()) outFile.put(' ');
        } while (index != freqTable.end());
        outFile << fileExten;
        outFile.close();
    }//end of if
    else {
        std::cout << "Unable to open file\n";
        std::exit(EXIT_FAILURE);
    }//end of else
}
/*------------------COMPRESSOR_FUNCTIONS_END------------------*/

/*-----------------DECOMPRESSOR_FUNCTIONS_START-----------------*/
void Huffman::inner::readKey(Table& freqTable,
                             std::string& fileExten,
                             const std::string keyPath,
                             std::string& fileContent) {
    char buffer{};
    std::string freq{};
    readFile(keyPath, fileContent);
    for (std::size_t index{}; index < fileContent.length(); ++index) {
        buffer = fileContent[index];
        index += 2;
        do {
            freq += fileContent[index];
            ++index;
        } while ((fileContent[index] != ' ') && (fileContent[index] != '.'));
        if (fileContent[index] == '.') {
            fileExten = fileContent.substr(index, (fileContent.length() - 1));
            index = fileContent.length();
        }//end of if
        else {
            freqTable[buffer] = static_cast<unsigned int>(std::stoi(freq));
            freq.clear();
        }//end of else
    }//end of for
    freqTable[buffer] = static_cast<unsigned int>(std::stoi(freq));
    fileContent.clear();
    fileContent.shrink_to_fit();
}

std::size_t Huffman::inner::decodedContentSize(const Table& freqTable) {
    std::size_t size{};
    for (const auto& index : freqTable) size += index.second;                   
    return size;
}

void Huffman::inner::decode(const std::string& filePath,
                            std::string& decodedContent,
                            smartNode root,
                            std::string& fileName,
                            std::string& fileContent) {
    node* temp = root.get();
    int offSet{}; int inBuff{};
    std::size_t foundName{ filePath.find_last_of("/\\") };
    fileName = filePath.substr(foundName + 1, filePath.find_last_of('C') - foundName - 1);      
    readFile(filePath, fileContent);
    for (const auto& data : fileContent) {
        inBuff = data;
        while (offSet < 8) {
            if (inBuff & (1 << (7 - offSet))) temp = temp->m_right.get();
            else                              temp = temp->m_left.get();
            if (temp->m_data) {
                decodedContent += temp->m_data;
                temp = root.get();
            }//end of if 
            ++offSet;
        }//end of while
        offSet = 0;
    }//end of for
}

void Huffman::inner::createFile(const std::string& decodedContent,
                                const std::string& locToDecompress,
                                const std::string& fileName,
                                const std::string& fileExten) {
    std::ofstream outFile(locToDecompress + fileName + fileExten, std::ios::binary);
    if (outFile.is_open()) {
        outFile.write(&decodedContent[0], static_cast<std::streamsize>(decodedContent.size()));
        outFile.close();
    }//end of if
    else {
        std::cout << "Unable to open file\n";
        std::exit(EXIT_FAILURE);
    }//end of else
}
/*------------------DECOMPRESSOR_FUNCTIONS_END------------------*/

void Huffman::compress(const std::string& filePath,
                       const std::string& locToCreateKey,
                       const std::string& locToCompress) {
    std::string                        fileName;
    std::string                        fileExten;
    Huffman::inner::setNameAndExten(filePath, fileName, fileExten);

    std::string                        fileContent;
    Huffman::inner::readFile(filePath, fileContent);

    Huffman::inner::Table              freqTable;
    Huffman::inner::UpdateFreqTable(freqTable, fileContent);

    Huffman::inner::smartNode root = Huffman::inner::makeTree(Huffman::inner::storeFreqTable(freqTable));

    Huffman::inner::Cypher             key;
    std::vector<bool>                  code;
    encode(root, key, code);
    Huffman::inner::createBinaryFile(locToCompress, fileName, fileContent, key, code);
    Huffman::inner::createKey(locToCreateKey, freqTable, fileName, fileExten);
}

void Huffman::decompress(const std::string& filePath,
                         const std::string& keyPath,
                         const std::string& locToDecompress) {
    Huffman::inner::Table       freqTable;
    std::string                 fileExten;
    std::string                 fileContent;
    Huffman::inner::readKey(freqTable, fileExten, keyPath, fileContent);

    Huffman::inner::smartNode root = Huffman::inner::makeTree(Huffman::inner::storeFreqTable(freqTable));

    std::string                 fileName;
    std::string                 decodedContent;
    decodedContent.reserve(Huffman::inner::decodedContentSize(freqTable));
    decode(filePath, decodedContent, std::move(root), fileName, fileContent);
    Huffman::inner::createFile(decodedContent, locToDecompress, fileName, fileExten);
}
globalturist
  • 71
  • 1
  • 7
  • I don't think that you will get any noticeable performance gain from using a deque. You should use std::unique_ptr for left and right and your root. Then you never have to clenaup. Why is Huffman a class? There is no state at all! Use a namespace for the public functions and a anonymous namespace in the cpp file for the privat ones. –  May 11 '17 at 11:23
  • Those are excellent points! I will use namespaces instead of the class, but the problem with smart pointers is as I pointed out i'm not proficient with, and last time i tried adding them to the code i got a lot of strange errors but i will try again. Thanks for the replay btw! – globalturist May 11 '17 at 16:14

2 Answers2

1

EDIT: fixed function:

decode(const std::string& filePath,
                            std::string& decodedContent,
                            smartNode root,
                            std::string& fileName,
                            std::string& fileContent) {
    node* temp = root.get();
    int offSet{}; int inBuff{};
    std::size_t foundName{ filePath.find_last_of("/\\") };
    fileName = filePath.substr(foundName + 1, filePath.find_last_of('C') - foundName - 1);      
    readFile(filePath, fileContent);
    for (const auto& data : fileContent) {
        inBuff = data;
        while (offSet < 8) {
            if (inBuff & (1 << (7 - offSet))) temp = temp->m_right.get();
            else                              temp = temp->m_left.get();
            if (temp->m_data) {
                decodedContent += temp->m_data;
                temp = root.get();
            }//end of if 
            ++offSet;
        }//end of while
        offSet = 0;
    }//end of for
}
globalturist
  • 71
  • 1
  • 7
  • Um in the original code, you had `temp=root`, and in this code you have `root = std::make_unique(*temp)`. That is a massive change. Root changed sides of the equality! – Yakk - Adam Nevraumont May 13 '17 at 20:03
  • only if i change the line to [code]root = static_cast(temp);[/code] will it not trow errors but the program still crashes i have no idea of how to fix this part apart from making smartNode shared_ptr – globalturist May 14 '17 at 09:07
  • That entire function doesn't make sense. Temp is initialized to root, then later you assign root to/from temp, which seems pointless. I suspect that entire function is nonsense. That static cast is a bad idea. Changing it to a shared pointer might remove our build error and crash, but it won't fix the broken function. The point of unique ptr is that if you actually intend unique ownership, it breaks the build instead of at runtime when you screw up. This is *good*, you want it to break at build time when your logic is broken. – Yakk - Adam Nevraumont May 14 '17 at 12:32
  • well if you use my original code, the function is very similar and it works perfectly, the problem is as i pointed i'm not proficient with smart pointers, i only know the basics and that's why i do it with trial and error method. Now i'm trying to get around the use of temp and eliminate it but no success so far. If you can give me some advice on how to move on i will be greatfull. – globalturist May 14 '17 at 16:32
  • When you rewrote it, you completely changed how `temp` was used. `temp=root` and `root=temp` **are completely different things**. The original code without smart pointers has the assignment going one way; the code with smart pointers **you reversed the assignment**. I don't know which one is a conceptual error and which is not, but they both cannot make sense. – Yakk - Adam Nevraumont May 14 '17 at 20:48
  • Well i cant just get rid of it completely otherwise decompress wont work at all. – globalturist May 15 '17 at 11:26
  • When I say "X does not make sense" (in particular, you reversing the order of `temp` and `root`), this is me asking "why are you reversing the order". As yet I have heard zero explanation why you are reversing the order of `temp` and `root` on the sides of the `=` sign. Until I get an explanation, I am done. – Yakk - Adam Nevraumont May 15 '17 at 11:48
  • I didn't understand it was a question, i thought it was a statement sorry about that. i'm reserving the order because i need to iterate through the tree. Why do that ? Lets say i read a byte that consists from 3 encoded characters 01 0010 10 now i need to decode all 3 we do that by iterating through the tree and once we get to the bottom of the tree (i.e m_data = some character) we add that character to decoded content and than we need to go back to the top of the tree and continue the process untill all bits are decoded. – globalturist May 15 '17 at 14:23
  • This is my last comment unless I get a clear answer. Your answer does not answer my question. In the original post, you had `temp=root`; and `node* temp{root};` In your "using smart pointers" version you have `node* temp = root.get();` and `root = std::make_unique(*temp);` The order in the original post in the *second assignment* in the functions (within the loop) **is reversed** compared to the other. You "explained", but from reading your explanation I have no idea which is "the right order" or not, or **why the OP and the "smart pointer version" disagree**. – Yakk - Adam Nevraumont May 15 '17 at 18:49
  • When your problem is "I cannot translate this code to smart pointers" and your smart pointer version *IS USING A FUNDAMENTALLY DIFFERENT OPERATION* on the line you are having problems with, and you are not extremely crystal clear WHY YOU CHANGED IT, I cannot help you. Your explaination doesn't even touch why smart pointers would be part of the reason why the reversal is required. I cannot imagine a situation where both versions would be "correct", or how smart pointers could make reversing it correct compared to dumb while leaving everything else the same. – Yakk - Adam Nevraumont May 15 '17 at 18:51
  • Again, if your answer within 24 hours completely and clearly describes the situation, I may come back and help. If it does not, I am not going to return to this question. – Yakk - Adam Nevraumont May 15 '17 at 18:52
  • After a while of messing around with the function i made it work. A) I revisited the previous order B) i abused the get() function to get the functionality i wanted. I updated the code above to the one that works and the question i have now is it the proper way to do the task? (go through a binary tree) – globalturist May 15 '17 at 20:57
0

Tweak node:

using upnode = std::unique_ptr<node>;
struct node {
  upnode       m_left;
  upnode       m_right;
  std::size_t m_frequency{};
  char        m_data{};

  node()=default;
  node(upnode left, upnode right) :
    m_left{ std::move(left) }, m_right{ std::move(right) }
  {
    m_frequency = m_left->m_frequency + m_right->m_frequency;
  }
};

delete this API:

void Huffman::deleteTree(node* root)

code that uses stuff:

// std::move(nodeData) into `makeTree`:
upnode Huffman::makeTree(std::deque<upnode> nodeData) {
  while (nodeData.size() > 1) {
    // functor must take upnode const&:
    std::sort(nodeData.begin(), nodeData.end(), functor());
    upnode leftSon{ std::move(nodeData.back()) };
    nodeData.pop_back();
    upnode rightSon{ std::move(nodeData.back()) };
    nodeData.pop_back();
    upnode parent = std::make_unique<node>(std::move(leftSon), std::move(rightSon));
    nodeData.emplace_back(std::move(parent));
  }//end of while loop
  return std::move(nodeData.front());
}
// return the deque here, instead of return-by-reference
std::deque<upnode> Huffman::storeFreqTable(const table& table) {
  std::deque<upnode> nodeData;
  for (const auto& index : table) {
    upnode leaf = std::make_unique<node>();
    leaf->m_data = index.first;
    leaf->m_frequency = index.second;
    nodeData.emplace_back(std::move(leaf));
  }//end of for loop
  return nodeData; // move is implicit
}
void Huffman::encode(upnode const &root,
                 cypher& key,
                 std::vector<bool>& code) {
  if (root->m_left != nullptr) {
    code.emplace_back(false);
    encode(root->m_left, key, code);
  }//end of if
  if (root->m_right != nullptr) {
    code.emplace_back(true);
    encode(root->m_right, key, code);
  }//end of if 
  if (root->m_data) key[root->m_data] = code;
  if (!code.empty()) code.pop_back();
}

sample use changes. I also moved variable use near to initialization. No point in having whole piles of variables that exist for most of the function yet have garbage data in them.

void Huffman::compress(
  const std::string& filePath,
  const std::string& locToCreateKey,
  const std::string& locToCompress
) {
  std::string                        fileName;
  std::string                        fileExten;
  setNameAndExten(filePath, fileName, fileExten);

  std::string                        fileContent;
  readFile(filePath, fileContent);

  table                              freqTable;
  UpdateFreqTable(freqTable, fileContent);

  // these two lines could become one:
  std::deque<upnode> nodeData = storeFreqTable(freqTable);
  uproot root = makeTree(std::move(nodeData));
  // auto root = makeTree(storeFreqTable(freqTable));

  cypher                             key;
  std::vector<bool>                  code;
  encode(root, key, code);
  createBinaryFile(locToCompress, fileName, fileContent, key, code);
  createKey(locToCreateKey, freqTable, fileName, fileExten);
  /*compressor algorithm*/

  /*memory release*/
  root.reset(); // really, optional, destruction of root var does it
}
Yakk - Adam Nevraumont
  • 262,606
  • 27
  • 330
  • 524
  • Thanks for posting this code! I will first try to implement it myself and use this as a cheat sheet, its simple and to the point. – globalturist May 12 '17 at 05:58
  • I'm struggling on changing the encode function, it doesn't want to cooperate with unique_ptr because it need another pointer to root object which unique_ptr wont allow, is it a good idea to just make it shared_ptr and solve the problem ? – globalturist May 13 '17 at 08:28
  • @globalturist No, `shared_ptr` should only be used if you have objects of lifetime that must be complex and managed in more than one spot at a time. I wrote an `encode` above; it takes a `unique_ptr` by reference and doesn't make a copy? – Yakk - Adam Nevraumont May 13 '17 at 11:26
  • Sorry, named the wrong function i meant decode i'l post the code i rewritten so far. – globalturist May 13 '17 at 11:50
  • @globalturist Non-owning pointers to objects with sufficient lifetime can be raw pointers. So `temp` can be a raw pointer. Use `temp=root.get()` to set it. – Yakk - Adam Nevraumont May 13 '17 at 12:47
  • in the code i posted as an answer (here below) i did make it a raw pointer but still get an error: "Severity Code Description Project File Line Suppression State Error C2280 'node::node(const node &)': attempting to reference a deleted function compressortest c:\program files (x86)\microsoft visual studio 14.0\vc\include\memory 1630 " – globalturist May 13 '17 at 13:12