I have a very strange bug in my code that is a little hard to explain. Let me begin with what the program does: basically, the C++ program takes input text (from a file named "input.txt" in the same directory) and uses Markov Chains to generate some artificial output text that resembles the style of the input text and prints it to the terminal.
It works when I copy and paste the text of 'Alice in Wonderland' (http://paulo-jorente.de/text/alice_oz.txt) directly into "input.txt", but if I add any words or characters to the beginning or end of the contents of the text file, then the code stops running (or runs infinitely). However, this does not happen if I add text anywhere in the middle of the contents of the text file.
If you would to test it yourself, try running the code with Alice in Wonderland copied into "input.txt". Then after it runs successfully, go to input.txt and type some random characters or words after the last of the text from 'Alice' ("...home again!") and try to run it again; it will fail.
Here is the code:
#include <ctime>
#include <iostream>
#include <algorithm>
#include <fstream>
#include <string>
#include <vector>
#include <map>
using namespace std;
class markovTweet{
string fileText;
map<string, vector<string> > dictionary;
public:
void create(unsigned int keyLength, unsigned int words) {
ifstream f("input.txt");
if(f.good()){
fileText.assign((istreambuf_iterator<char>(f)), istreambuf_iterator<char>());
}else{
cout << "File cannot be read. Ensure there is a file called input.txt in this directory." << "\n" << endl;
return;
}
if(fileText.length() < 1){
return;
}
cout << "\n" << "file imported" << "\n";
createDictionary(keyLength);
cout << "\n" << "createDictionary" << "\n" << "\n";
createText(words - keyLength);
cout << "\n" << "text created, done" << endl;
}
private:
void createText(int w) {
string key, first, second;
size_t next;
map<string, vector<string> >::iterator it = dictionary.begin();
advance( it, rand() % dictionary.size() );
key = (*it).first;
cout << key;
while(true) {
vector<string> d = dictionary[key];
if(d.size() < 1) break;
second = d[rand() % d.size()];
if(second.length() < 1) break;
cout << " " << second;
if(--w < 0) break;
next = key.find_first_of( 32, 0 );
first = key.substr( next + 1 );
key = first + " " + second;
}
cout << "\n";
}
void createDictionary(unsigned int kl) {
string w1, key;
size_t wc = 0, pos, next;
next = fileText.find_first_not_of( 32, 0 );
if(next == string::npos) return;
while(wc < kl) {
pos = fileText.find_first_of(' ', next);
w1 = fileText.substr(next, pos - next);
key += w1 + " ";
next = fileText.find_first_not_of(32, pos + 1);
if(next == string::npos) return;
wc++;
}
key = key.substr(0, key.size() - 1);
while(true) {
next = fileText.find_first_not_of(32, pos + 1);
if(next == string::npos) return;
pos = fileText.find_first_of(32, next);
w1 = fileText.substr(next, pos - next);
if(w1.size() < 1) break;
if(find( dictionary[key].begin(), dictionary[key].end(), w1) == dictionary[key].end() )
dictionary[key].push_back(w1);
key = key.substr(key.find_first_of(32) + 1) + " " + w1;
}
}
};
int main() {
markovTweet t;
cout << "\n" << "Artificially generated tweet using Markov Chains based off of input.txt: " << "\n" << "\n";
//lower first number is more random sounding text, second number is how long output is.
t.create(4, 30);
return 0;
}
This is a very strange bug and any help that you can offer is much appreciated! Thanks!