test.fa.gz file contains multiple 4 lines as blow:
@HWI-ST298:420:B08APABXX:3:1101:1244:2212 1:N:0:TCATTC
GGCAAGGCACTTACTTTACAGCTAAAGAAGTGCAGC
+
@@@FDFFDFHCFDACGHC<<CCFEHHFCCFCEE:C?
What I want to do is to read every four lines of *.fq.gz file in parallel with OpenMP. The code blow could be compiled successfully, but will show incorrect results sometimes. In each for loop, I used 4 times of getline() to read the file. I'm not sure how OpenMP will handle the multiple jobs in each for loop and how the .gz file handle will move between for loops of OpenMP.
I've searched internet and OpenMP documents for help, but still don't quite get it. So any help will be appreciated.
Thanks,
#include <iostream>
#include <string>
#include <cstdlib>
#include <gzstream.h>
#include <omp.h>
using namespace std;
string reverseStrand (string seq);
int main (int argc, char ** argv) {
const char* gzFqFile;
unsigned int nReads;
if (argc == 3) {
gzFqFile = argv[1];
nReads = atoi(argv[2]); }
else {
printf("\n%s <*.fq.gz> <number_of_reads>\n", argv[0]);
return 1; }
igzstream gz(gzFqFile);
string li, bp36, strand, revBp36;
unsigned int i;
#pragma omp parallel shared(gz) private(i,li,bp36,strand,revBp36)
{
#pragma omp for schedule(dynamic)
for(i = 0;i < nReads;++i) {
li = "";
bp36 = "";
strand = "";
revBp36 = "";
getline(gz,li,'\n');
getline(gz,li,'\n');
bp36 = li;
getline(gz,li,'\n');
strand = li;
getline(gz,li,'\n');
if(strand.compare("-") == 0) {
revBp36 = reverseStrand(bp36);
}
cout << bp36 << " " << strand << " " << revBp36 << "\n";
}
}
gz.close();
}