-2

I have to read two text files and then compare words from second file with the first one. Then , I have to display KnownWords which are same words from both files and the remaining words which are not same are UnknownWords. Next Step is, I have to display most frequent known words in DisplayMostFreqKnownWords() and unknown words in DisplayMostFreqUnknownWords() functions. I have successfully completed DisplayMostFreqKnownWords() and so far Output is alright. I copied the same code from DisplayMostFreqKnownWords() to DisplayMostFreqUnknownWords() but in this is function it is not showing anything in the output. I dont know what is wrong. Can someone figure this one out.

Output is:

Displaying most frequent known words
       Word      Count
        the        19
          a        14
         of        11
 artificial        11
       that        10
         to         7
     signal         7
        and         7
         in         6
       they         5
Displaying most frequent unknown words
       Word      Count

Header file:

typedef map<string, vector<int> > WordMap;
typedef WordMap::iterator WordMapIter;

class WordStats
{
public:
    WordStats();
    void ReadDictionary();
    void DisplayDictionary();
    void ReadTxtFile();
    void DisplayKnownWordStats();
    void DisplayUnknownWordStats();
    void DisplayMostFreqKnownWords();
    void DisplayMostFreqUnknownWords();

private:
    WordMap KnownWords;
    WordMap UnknownWords;
    WordMapIter Paragraph;
    set<string> Dictionary;
    char Filename[256];
}

My program:

// Displays 10 most frequent words in KnownWords
void WordStats::DisplayMostFreqKnownWords(){
    int count;
    multimap<int,string > displayFreqWords;// new map with int as key 
    (multimap because key could occur more than once)
    multimap<int,string >::reverse_iterator rit = displayFreqWords.rbegin();
    for (Paragraph = KnownWords.begin();  Paragraph != KnownWords.end(); 
    ++Paragraph){ // iterate map again
        string word = (*Paragraph).first;
        int cnt = (*Paragraph).second.size();
        displayFreqWords.insert(pair<int,string>(cnt,word));
    }
//  multimap<int,string>::iterator rit; // iterator for new map
cout <<"           Word      Count\n";
for(; count<=10 && rit!=displayFreqWords.rend(); rit++, ++count){           
        string word = (*rit).second;
        int cnt = (*rit).first;
        cout << setw(15) << word << setw(10) << cnt << endl;
    }
}
// Displays 10 most frequent words in UnknownWords
void WordStats::DisplayMostFreqUnknownWords(){
    int count;
    multimap<int,string > displayFreqUnknownWords;
    multimap<int,string >::reverse_iterator rrit = 
    displayFreqUnknownWords.rbegin();
    for (Paragraph = UnknownWords.begin();  Paragraph != 
        UnknownWords.end(); ++Paragraph){ 
        string word = (*Paragraph).first;
        int cnt = (*Paragraph).second.size();
        displayFreqUnknownWords.insert(pair<int,string>(cnt,word));
}
//  multimap<int,string>::iterator rit; // iterator for new map
cout <<"           Word      Count\n";
for(; count<=10 && rrit!=displayFreqUnknownWords.rend(); rrit++, ++count){          
        string wrd = (*rrit).second;
        int ccnt = (*rrit).first;
        cout << setw(15) << wrd << setw(10) << ccnt << endl;
}
}
muzzi
  • 382
  • 3
  • 10
  • 1
    It is unclear what you are asking. Please [edit] your post to clarify and provide a [mcve]. – Ron May 09 '18 at 11:36
  • The code uses `for(; count<=10` as a loop condition, but it never does `count = 0;` to set a starting value. – Bo Persson May 09 '18 at 11:39
  • 1) You never initialize `count` 2) You should check the contents of `UnknownWords` to see if you really put elements in `displayFreqUnknownWords` – Gaurav Sehgal May 09 '18 at 11:40
  • If these are the mistakes then why DisplayFreqKnownWords() is working perfectly fine while both have almost similar coding . Also, how to check UnknownWords to know if i really put the elements in displayFreqUnknownWords. – muzzi May 09 '18 at 16:28

1 Answers1

0

Here's a way to express what I think is your use case. I have used c++17 tuple expansion.

I have used unordered_map to deduce which words are known or unknown, and two multimaps to determine known and unknown word frequency.

Hope it's helpful.

#include <sstream>
#include <tuple>
#include <string>
#include <unordered_map>
#include <algorithm>
#include <iterator>
#include <map>
#include <iostream>
#include <iomanip>
#include <fstream>


// Set this to 1 to run a static test
#define TESTING 0


#if TESTING

using input_type = std::istringstream;

std::tuple<input_type, input_type> open_inputs() {
    return {
            std::istringstream("the big black cat sat on the grey mat"),
            std::istringstream("the gold small cat lay on the purple mat")
    };
}

#else

using input_type = std::ifstream;

std::tuple<input_type, input_type> open_inputs() {
    return {
            std::ifstream("left_file.txt"),
            std::ifstream("right_file.txt"),
    };
}

#endif

struct Counts {
    int left_count = 0, right_count = 0;

    int total() const {
        return left_count + right_count;
    }

    bool is_known() const {
        return left_count && right_count;
    }

};

template<class F>
void for_each_word_in_file(std::istream &is, F f) {
    std::for_each(std::istream_iterator<std::string>(is),
                  std::istream_iterator<std::string>(),
                  f);
}

int main() {

    // open files
    auto[left, right] = open_inputs();

    auto known_words = std::unordered_map<std::string, Counts>();

    // count words in each file

    for_each_word_in_file(left, [&known_words](auto &&word) {
        ++known_words[word].left_count;
    });

    for_each_word_in_file(right, [&known_words](auto &&word) {
        ++known_words[word].right_count;
    });

    // map counts to words, in descending order, allowing multiple entries of the same count

    std::multimap<int, std::string, std::greater<>> known_ordered, unknown_ordered;

    // iterate all words seen, putting into appropriate map

    for (auto&&[word, counts] : known_words) {
        (counts.is_known() ? known_ordered : unknown_ordered)
                .emplace(counts.total(), word);
    }

    // emit results

    std::cout << "Known words by frequency\n";
    for (auto&&[freq, word] : known_ordered) {
        std::cout << std::setw(15) << word << " " << freq << '\n';
    }

    std::cout << "\nUmknown words by frequency\n";
    for (auto&&[freq, word] : unknown_ordered) {
        std::cout << std::setw(15) << word << " " << freq << '\n';
    }
}
Richard Hodges
  • 68,278
  • 7
  • 90
  • 142