I have that following subroutine, and have appx 20 threads calling it with different URLs (this sub belongs to a package, each thread calls a different instance of that package):
sub get_urls {
my ($self,$url,$depth) = @_;
my $cv = AnyEvent->condvar;
my @data;
my %visited;
my $hostname = URI->new($url)->host();
my $tr_cb;
my ($b,$e) = (0,0);
return unless($depth);
# This code-ref is recursive!
$tr_cb = sub {
my $sitem = shift;
my $depth = shift;
return if (0 == $depth--);
foreach my $site (@$sitem) {
if (exists($visited{$site})) {
next;
}
$b++;
$visited{$site} = 1;
$cv->begin;
AnyEvent::HTTP::http_get ($site, timeout => 1, sub {
my ($body, $hdr) = @_;
if ($hdr->{Status} =~ m/^2/) {
my $extor = HTML::SimpleLinkExtor->new();
my @links;
print "E = $e | B = $b\n";
#print "[REC_DEPTH:$depth]Working on $site\n";
$extor->parse($body);
@links = map { URI->new_abs($_,$site)->as_string }
grep { length > 2 } $extor->links();
push(@data,@links);
$tr_cb->([map { $_->[2] }
grep { $_->[0] eq $_->[1] }
map { [$hostname,URI->new($_)->host(),$_] } @links],$depth);
}
$e++;
$cv->end;
});
}
};
$tr_cb->([$url],$depth);
$cv->recv;
print "Got total of " . @data . " links\n";
}
the ($b,$e)
variables are there for testing only.
the issue is, after some time, it seems like the number of 'begins' does not match the amount of 'ends' hence it never gets pass $cv->recv
...
I'm kinda new to AnyEvent and Event-Programming in general, cant seem to fine my issue.
Thanks,