1

I am using https://github.com/FriendsOfPHP/Goutte. I keep getting the wrong url on clicking the pagination link in while loop.

The selectLink on the object returns the right url for the first while loop. Looks like the second loop returns the wrong value for selectLink.

Here is the code.

public function __construct(Goutte\Client $client){

    $this->client = $client;
}

public function parse(){

    $url = "https://www.nextag.com/Arts-Entertainment--zz2702147z0z1zB6c4z5---html";

    // crawl through first page
    $crawler    = $this->client->request('GET', $url);

    // first page pagination links
    $links      = $this->paginationCrawler($crawler);

    $linkBatch  = array(); 

    // get all pagination links and check if the next 10 links are available 
    list($linkBatch[], $_nextPage) = $this->getPaginationLinks($links);

    // if $_nextPage == '11+/21+/etc' then crawl through all links
    while($_nextPage != 'false'){

        $link                           = $links->selectLink($_nextPage)->link();

        $crawler                        = $this->client->click($link);

        $links                          = $this->paginationCrawler($crawler);

        list($linkBatch[], $_nextPage)  = $this->getPaginationLinks($links);

    }

    dd($linkBatch);
}   

public function paginationCrawler($crawler){

    return $crawler->filter('#pagination');
}

public function getPaginationLinks($links){

    $allLinks = $links->filter('#numbers a');

    $linkNodes = $allLinks->each(function(Crawler $a) {

        return  $a->attr('href');

    });

    $lastPage = trim($links->filter('#numbers :last-child')->text());

    if (strpos($lastPage,'+') === false) {

        $lastPage = 'false';

    }

    return array($linkNodes, $lastPage);
}

Here is the output:

enter image description here

Sankalp Tambe
  • 400
  • 4
  • 15
  • Solved. But with a work around. Instead of sending text **11+**, now am sending a link object of the url itself. Still didn't understand what went wrong with it. – Sankalp Tambe Jan 25 '16 at 09:54

0 Answers0