I'm using Bioperl
to find GOterms
for genes.
I retrieve an html
file, convert it to text, get rid of all extra spaces and newlines, and try to go through the resulting array.
However, I keep getting errors for accessing uninitialized values in the array. I put in many checks to make sure the array is not empty and that I'm not going out of bounds. How can I get rid of this error?
I reposted the code in a more readable format. Thank you for your help.
It seems to successfully parse out the correct data from the html so I don't know what's wrong.
#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use HTML::TreeBuilder;
use HTML::FormatText;
my $URL = get("http://amigo.geneontology.org/amigo/term/GO:0000001");
my $Format = HTML::FormatText->new;
my $TreeBuilder = HTML::TreeBuilder->new;
$TreeBuilder->parse($URL);
my $Parsed = $Format->format($TreeBuilder);
print "$Parsed";
my @parsed = split( /[ ]{2,}|(\n+)|(\r+)/, $Parsed );
if ( @parsed == 1 ) { return; }
my %termhash;
my $count = 0;
while ( $count < @parsed ) {
if ( defined $parsed[$count] && $parsed[$count] eq 'Name' ) {
my $count2 = $count;
while ( ( $parsed[$count2] ne 'Feedback' ) && ( $count2 < @parsed ) ) {
$count2++;
}
$count2--;
@parsed = @parsed[ $count .. $count2 ]; # Gets the slice of the array needed
last;
}
$count++;
}
if ( @parsed <= 1 ) { return; }
print "\n";
print @parsed;
$count = 0;
while ( $count < @parsed ) {
if ( $parsed[$count] eq 'Name' ) {
while ( $parsed[$count] ne 'Ontology' && ( $count < @parsed )) {
$termhash{'Name'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'Ontology' ) {
while ( $parsed[$count] ne 'Synonyms' && ( $count < @parsed )) {
$termhash{'Category'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'Synonyms' ) {
while ( $parsed[$count] ne 'Definition' && ( $count < @parsed )) {
$termhash{'Aliases'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'Definition' ) {
while ( $parsed[$count] ne 'Comment' && ( $count < @parsed )) {
$termhash{'Definition'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'Comment' ) {
while ( $parsed[$count] ne 'History' && ( $count < @parsed )) {
$termhash{'Comment'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'History' ) {
while ( $parsed[$count] ne 'Subset' && ( $count < @parsed )) {
$termhash{'Version'} .= $parsed[$count];
$count++;
}
}
if ( $parsed[$count] eq 'Subset' ) {
while ( ( $parsed[$count] ne 'Community' ) && ( $count < @parsed ) ) {
$count++;
}
}
if ( $parsed[$count] eq 'Community' ) {
while ( ( $parsed[$count] ne 'Related' ) && ( $count < @parsed ) ) {
$count++;
}
}
if ( $parsed[$count] eq 'Related' ) {
for ( $count < @parsed ) {
$termhash{'Definition references'} .= $parsed[$count];
$count++;
}
}
}
if ( $termhash{'Definition'} =~ m/OBSOLETE/ ) { $termhash{'Is obsolete'} = 1 }
else { $termhash{'Is obsolete'} = 0 }
#print %termhash;
The main error messages are:
Use of uninitialized value $parsed[127] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 23.
Use of uninitialized value $parsed[1] in print at /home/adur/workspace/BI7643/ParseGOhtml.pl line 35.
Use of uninitialized value $parsed[1] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 42.
Use of uninitialized value $parsed[1] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 41.
Use of uninitialized value $parsed[17] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 48.
Use of uninitialized value $parsed[17] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 47.
Use of uninitialized value $parsed[29] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 54.
Use of uninitialized value $parsed[29] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 53.
Use of uninitialized value $parsed[41] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 60.
Use of uninitialized value $parsed[41] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 59.
Use of uninitialized value $parsed[79] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 66.
Use of uninitialized value $parsed[79] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 65.
Use of uninitialized value $parsed[83] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 72.
Use of uninitialized value $parsed[83] in concatenation (.) or string at /home/adur/workspace/BI7643/ParseGOhtml.pl line 71.
Use of uninitialized value $parsed[95] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 77.
Use of uninitialized value $parsed[107] in string ne at /home/adur/workspace/BI7643/ParseGOhtml.pl line 82.