I have a huge csv file of nearly 20k rows with below format:
file,tools,edit,syntax,buffers
a,b,c,perl,d
a,w,c33,java,d
a,e,c,perl,d
a,s,c,python,d1
a,n,c,php,d3
d,r,hhh,cpp,d0
d,m,hhh,c#,d0
a,o,c,pdf,d3
a,f,c,python,dd
a,h,c,perl,dg
a,yb,c,c,ddf
a,b,c,perl,dt
wa,b,c33,java,d
d,buuu,hhh,cpp,d0
d44,b,hhh,nlp,d0
a,be,c,js,d4
wa,b,c33,java,d
wa,b,c33,python,d
wa,b,c33,python,d
wa,b,c33,c#,d
wa,b,c33,python,d
wa,b,c33,php,d
wa,b,c33,python,d
wa,b,c33,php,d
wa,b,c33,python,d
wa,b,c33,perl,d
wa,b,c33,php,d
wa,b,c33,java,d
wa,b,c33,python,d
I need to put 2 lines of pattern with same syntax (i.e 4th column) at the top. And then the rest of the lines will be there as is. That means first two lines with syntax as 'perl', then followed by 'java' , 'python' etc that way.
I have so far written this below code using seek and tell to make it optimized. However, it is not working as expected.
use strict;
use warnings;
open(FP, "+<mycsv.csv");
my %hash = ();
my $cur_pos;
while(<FP>) {
my $line = $_;
chomp $line;
#print "$line aaa\n";
if($line =~ /^file\,tools,/) {next;}
if($line =~ /^\w+\,\w+\,\w+,(\w+)\,.*$/) {
my $type = $1;
#print "type $type\n";
if($hash{$type}->{count} < 2 ) {
#print "--- here type = $type | lastpos = ", $hash{$type}->{lastpos} , "\n";
$cur_pos = tell(FP);
my $pos = tell(FP) - length($line);
if($hash{$type}->{lastpos} ) {
my $lastpos = $hash{$type}->{lastpos};
seek(FP, $lastpos, 1);
print FP $line;
seek(FP, $cur_pos, 1);
}
$hash{$type}->{lastpos} = $pos;
}
if(exists $hash{$type} ) {
$hash{$type}->{count} += 1;
} else {
$hash{$type}->{count} = 1;
}
}
}
close(FP);
The expected output should look like below:
file,tools,edit,syntax,buffers
a,b,c,perl,d
a,e,c,perl,d
a,w,c33,java,d
wa,b,c33,java,d
a,s,c,python,d1
a,f,c,python,dd
a,n,c,php,d3
wa,b,c33,php,d
d,r,hhh,cpp,d0
d,buuu,hhh,cpp,d0
d,m,hhh,c#,d0
wa,b,c33,c#,d
a,o,c,pdf,d3
a,yb,c,c,ddf
d44,b,hhh,nlp,d0
a,be,c,js,d4
a,h,c,perl,dg
a,b,c,perl,dt
wa,b,c33,java,d
wa,b,c33,python,d
wa,b,c33,python,d
wa,b,c33,python,d
wa,b,c33,python,d
wa,b,c33,php,d
wa,b,c33,python,d
wa,b,c33,perl,d
wa,b,c33,php,d
wa,b,c33,java,d
wa,b,c33,python,d
Any help to make it work would be much appreciated.
Thanks.