I am trying to write a parser to read large text file in C++. Similar python code using readtable method is approximately 7 to 8 times faster.
I am wonder why it runs so slow in C++. Most of the time is taken in using istringstream to parse lines to separate table numbers. It will be great if someone can point issue with code or alternative to istringstream. The code is as below:
'''
#include <fstream>
#include <iostream>
#include <string>
#include <sstream>
#include <vector>
#include <algorithm>
#include <chrono>
using namespace std::chrono;
int main()
{
auto start = high_resolution_clock::now();
std::ifstream inf{ "/Users/***/some.bed" };
std::istringstream iss;
int aprox_nlines = 7000000;
std::vector<int>* ptr_st = new std::vector<int>();
std::vector<int>& start_v = *ptr_st;
start_v.reserve(aprox_nlines);
std::vector<int>* ptr_en = new std::vector<int>();
std::vector<int>& end_v = *ptr_en;
end_v.reserve(aprox_nlines);
// If we couldn't open the output file stream for reading
if (!inf)
{
// Print an error and exit
std::cerr << "Uh oh, File could not be opened for reading!" << std::endl;
return 1;
}
int count=0;
std::string line;
int sstart;
int end_val;
std::string val;
if (inf.is_open())
{
while (getline(inf, line))
{
count += 1;
iss.str(line);
iss >> val;
iss >> sstart;
start_v.push_back(sstart);
iss >> end_val;
end_v.push_back(end_val);
}
std::cout << count<<"\n";
inf.close();
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::cout << "Time taken by function: " << duration.count() << " microseconds" <<"\n";
return 0;
}
'''