I've been working on this for awhile now, and I haven't been able to solve the issue. Essentially, if a user were to be given a ".txt" file in which the following was written, "tHe@doG#wENt&uP$tHE!hiLL!"
, the end result would need to look like this:
the
hed
edo
dog
ogw
gwe
wen
ent
ntu
...
I could go on but I'm sure it's self-explanatory. The code I've been using to try and accomplish this is shown below.
#include "functions.h"
#include "bigint/bigint.h"
#include <ctype.h>
#include <string>
#include <cstdio>
#include <stdio.h>
int main(int argc, char *argv[]) {
std::vector<std::string> v(17575);
std::string tri1;
std::string tri2;
std::string tri3;
for (int i = 1; i < argc; i++) {
char* filename = argv[i];
std::ifstream infile;
infile.open(filename);
// executes for discoverable files
if(! infile.fail()) {
char ch;
while (infile.get(ch)) {
if (isspace(ch) || ispunct(ch) || isdigit(ch)) {
continue;
}
else if (isupper(ch)) {
ch = tolower(ch);
}
tri1 += ch;
char ch2 = infile.peek();
if (isspace(ch2) || ispunct(ch2) || isdigit(ch2)) {
ch2 = infile.peek() + 1;
}
else if (isupper(ch2)) {
ch2 = islower(ch2);
}
tri2 += ch2;
char ch3 = infile.peek() + 1;
if (isspace(ch3) || ispunct(ch3) || isdigit(ch3)) {
ch3 = infile.peek() + 2;
}
else if (isupper(ch3)) {
ch3 = islower(ch3);
}
tri3 += ch3;
if (tri1.length() == 3) {
std::cout << tri1 << std::endl;
v.push_back(tri1);
tri1 = "";
}
if (tri2.length() == 3) {
std::cout << tri2 << std::endl;
v.push_back(tri2);
tri2 = "";
}
if (tri3.length() == 3) {
std::cout << tri3 << std::endl;
v.push_back(tri3);
tri3 = "";
}
} //while
infile.close();
}
// executes for non-discoverable files
else {
std::cerr << "Unable to open file: " << filename << '\n';
}
}
return 0;
}
This has to do with Frequency Analysis of Trigrams (the vectors I'm going to be using later), and I feel like there is a way to shrink the amount of conditionals I have down, I just don't know how. Any help will be greatly appreciated!