Given that the actual data sequence you're after is potentially split across lines, you need to read the data in the smallest "bite" you can - two-character arrays - and ignore whitespace (the space or newline delimeters).
Once you do this, you can keep track of what you've read as you write it to your sub-file. Once you get your "magic sequence", start a new sub-file.
Two complexities that you don't cover:
- Is the "magic sequence" at all possible to exist in a file as part of the normal data? If so, you're going to split an otherwise-single file.
- I assume you don't want the "magic sequence" at the end of every sub-file. That's going to add a little complexity to your comparison:
- If you start to match, you need to suspend writing to the sub-file.
- If you get halfway through and suddenly stop matching, you're going to have to write out the partial match before writing out the new non-matching entry.
One advantage in doing it this way: if a sub-file, while still inside the main file, started near the end of a line, it will start at a new line and break after 16 two-characters rather than mimic its position in the main file. Or did you want the sub-files output in true bytes, without space delimiters?
I'm going to go away and write this program: it sounds like fun!
OK, I wrote the following. Hopefully the Usage describes what to do. I didn't particularly want to use streams - I find them horribly inefficient - but you started it...
//
// SubFile.cpp
//
#include <string>
#include <fstream>
#include <iostream>
#include <iomanip>
using namespace std;
const unsigned MaxBytesPerLine = 16;
const unsigned char magic[] = { '\x20','\x00','\x20','\x00','\x00','\x10','\x00','\x00' };
class OutFile : private ofstream {
public: // Methods
using ofstream::is_open; // Let others see whether I'm open
OutFile(const string &fileName, bool bin);
bool Write(unsigned b);
~OutFile();
private: // Variables
unsigned num; // Number bytes in line
bool bin; // Whether to output binary
}; // OutFile
OutFile::OutFile(const string &filename, bool bin) :
ofstream(filename),
num(0),
bin(bin) {
if (!bin) {
setf(uppercase);
} // if
} // OutFile::OutFile(name, bin)
bool OutFile::Write(unsigned b) {
if (bin) {
char c = (char)b; // Endian fix!
return write(&c, 1).good();
} // if
if (num > 0) {
*this << " ";
} // if
*this << setbase(16) << setw(2) << setfill('0') << b;
if (++num == MaxBytesPerLine) {
*this << endl;
num = 0;
} // if
return good();
} // OutFile::Write(b)
OutFile::~OutFile() {
if (bin) {
return;
} // if
if (num == 0) {
return;
} // if
if (!good()) {
return;
} // if
*this << endl;
} // OutFile::~OutFile
void Usage(char *argv0) {
cout << "Usage:" << endl;
cout << " " << argv0 << " <filename.txt> [bin]" << endl;
cout << " Read <filename.txt> in hex char pairs, ignoring whitespace." << endl;
cout << " Write pairs out to multiple sub-files, called \"1.txt\", \"2.txt\" etc." << endl;
cout << " New files are started when the following sequence is detected: " << endl << " ";
for (unsigned i = 0; i < sizeof(magic); ++i) {
cout << ' ' << hex << setw(2) << setfill('0') << (int)magic[i];
} // for
cout << endl;
cout << " If bin is specified: write out in binary, and files have a '.bin' extension" << endl;
} // Usage(argv0)
int main(int argc, char *argv[]) {
if (argc < 2) {
Usage(argv[0]);
return 1;
} // if
ifstream inFile(argv[1]);
if (!inFile.is_open()) {
cerr << "Could not open '" << argv[1] << "'!" << endl;
Usage(argv[0]);
return 2;
} // if
bool bin = (argc >= 3) &&
(argv[2][0] == 'b'); // Close enough!
unsigned fileNum = 0; // Current output file number
inFile >> setbase(16); // All inFile accesses will be like this
while (inFile.good()) { // Let's get started!
string outFileName = to_string(++fileNum) + (bin ? ".bin" : ".txt");
OutFile outFile(outFileName, bin);
if (!outFile.is_open()) {
cerr << "Could not create " << outFileName << "!" << endl;
return (int)(fileNum + 2);
} // if
unsigned b; // byte read in
unsigned pos = 0; // Position in 'magic'
while (inFile >> b) {
if (b > 0xFF) {
cerr << argv[1] << " contains illegal value: "
<< hex << uppercase << showbase << b << endl;
return -1;
} // if
if (b == magic[pos]) { // Found some magic!
if (++pos == sizeof(magic)) { // ALL the magic?
break; // Leave!
} // if
continue; // Otherwise go back for more
} // if
if (pos > 0) { // Uh oh. No more magic!
for (unsigned i = 0; i < pos; ++i) {
outFile.Write(magic[i]); // So write out what we got
} // for
pos = 0;
} // if
outFile.Write(b);
} // while
} // for
if (inFile.eof()) {
return 0; // Success!
} // if
string s;
inFile.clear();
getline(inFile, s);
cerr << argv[1] << " contains invalid data: " << s << endl;
return -2;
} // main(argc,argv)
Whenever someone posts code, there are invariably comments posted:
"Why didn't you do this?"
"Why did you do that?"
Let the floodgates open!