Whenever I run the following Perl script I got the errors below
Use of uninitialized value $date in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
Use of uninitialized value $first_page in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
Use of uninitialized value $last_page in concatenation (.) or string at D:\sagar\toc\Online_TOC.pl line 111, <> line 1.
The following code is run at the command prmpt by giving URL
http://ajpheart.physiology.org/content/309/11
It generates the meta_issue11.xml
file but does not give proper output.
#!/usr/bin/perl
use warnings;
use strict;
use feature qw{ say };
use HTML::Parser;
use WWW::Mechanize;
my ( $date, $first_page, $last_page, @toc );
sub get_date {
my ( $self, $tag, $attr ) = @_;
if ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-date' eq $attr->{class}
and not defined $date )
{
$self->handler( text => \&next_text_to_date, 'self, text' );
}
elsif ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-pages' eq $attr->{class} )
{
if ( not defined $first_page ) {
$self->handler( text => \&parse_first_page, 'self, text' );
}
else {
$self->handler( text => \&parse_last_page, 'self, text' );
}
}
elsif ( 'span' eq $tag
and $attr->{class}
and 'highwire-cite-metadata-doi' eq $attr->{class} )
{
$self->handler( text => \&retrieve_doi, 'self, text' );
}
elsif ( 'div' eq $tag
and $attr->{class}
and $attr->{class} =~ /\bissue-toc-section\b/ )
{
$self->handler( text => \&next_text_to_toc, 'self, text' );
}
}
sub next_text_to_date {
my ( $self, $text ) = @_;
$text =~ s/^\s+|\s+$//g;
$date = $text;
$self->handler( text => undef );
}
sub parse_first_page {
my ( $self, $text ) = @_;
if ( $text =~ /([A-Z0-9]+)(?:-[0-9A-Z]+)?/ ) {
$first_page = $1;
$self->handler( text => undef );
}
}
sub parse_last_page {
my ( $self, $text ) = @_;
if ( $text =~ /(?:[A-Z0-9]+-)?([0-9A-Z]+)/ ) {
$last_page = $1;
$self->handler( text => undef );
}
}
sub next_text_to_toc {
my ( $self, $text ) = @_;
push @toc, [$text];
$self->handler( text => undef );
}
sub retrieve_doi {
my ( $self, $text ) = @_;
if ( 'DOI:' ne $text ) {
$text =~ s/^\s+|\s+$//g;
push @{ $toc[-1] }, $text;
$self->handler( text => undef );
}
}
print STDERR 'Enter the URL: ';
chomp( my $url = <> );
my ( $volume, $issue ) = ( split m(/), $url )[ -2, -1 ];
my $p = 'HTML::Parser'->new(
api_version => 3,
start_h => [ \&get_date, 'self, tagname, attr' ],
);
my $mech = 'WWW::Mechanize'->new( agent => 'Mozilla' );
$mech->get( $url );
my $contents = $mech->content;
$p->parse( $contents );
$p->eof;
my $toc;
for my $section ( @toc ) {
$toc .= "<TocSection>\n";
$toc .= "<Heading>" . shift( @$section ) . "</Heading>\n";
$toc .= join q(), map "<DOI>$_</DOI>\n", @$section;
$toc .= "</TocSection>\n";
}
open( F6, ">meta_issue_$issue.xml" );
print F6 <<"__HTML__";
<!DOCTYPE MetaIssue SYSTEM "http://schema.highwire.org/public/toc/MetaIssue.pubids.dtd">
<MetaIssue volume="$volume" issue="$issue">
<Provider>Cadmus</Provider>
<IssueDate>$date</IssueDate>
<PageRange>$first_page-$last_page</PageRange>
<TOC>$toc</TOC>
</MetaIssue>
__HTML__