0

i use scrapy playwright to scrape zillow but instead of getting all 40 listings i just get 8 i made sure i used the css selector for all 40 listings please help

import scrapy
from ..items import ZillowScrapItem
from scrapy_playwright.page import PageMethod
from scrapy.selector import Selector
class ScrapSpider(scrapy.Spider):
    name = "scrap"
    def start_requests(self):
        yield scrapy.Request(
            url="https://www.zillow.com/san-jose-ca/scroll",
            meta=dict(
                playwright=True,
                playwright_include_page=True,
                playwright_page_methods=[
                    PageMethod("wait_for_selector", '.property-card-data a'),
                    PageMethod('evaluate', "window.scrollBy(0, document.body.scrollHeight)"),
                    PageMethod("wait_for_selector", '#zpid_19470263 .Image-c11n-8-84-0__sc-1rtmhsc-0'),
                ],
                errback=self.errback,
            ),
            callback=self.parse
        )
    async def parse(self, response):
        items = ZillowScrapItem()
        price = response.css('.bqsBln span::text').extract()
        address = response.css('address::text').extract()
        bed = response.css('.dmDolk li:nth-child(1) b::text').extract()
        bath = response.css('.dmDolk li:nth-child(2) b::text').extract()
        area = response.css('.dmDolk li~ li+ li b::text').extract()
        link = response.css('.property-card-data a::attr(href)').extract()
        img_link = response.css('.gFiOAr::attr(src)').extract()
        items['price'] = price
        items['address'] = address
        items['bed'] = bed
        items['bath'] = bath
        items['area'] = area
        items['link'] = link
        items['img_link'] = img_link
        yield items
    async def errback(self, failure):
        page = failure.request.meta["playwright_page"]
        await page.close()

i have tried response.meta but it's not working iam using ubuntu wsl on windows 10 what is the proplem here?

page = response.meta["playwright"]
  • you likely have to scroll the page in order to get the rest – Alexander Jun 03 '23 at 00:27
  • @Alexander hey thank you for your answer , I have tried to use await page.evaluate("window.scrollBy(0, document.body.scrollHeight)") but I get this error I have watched a YouTube video and it works fine in the video AttributeError: 'bool' object has no attribute 'evaluate' I don't know how to fix it looking forward to your answer – Sabry Belal Jun 03 '23 at 14:36

0 Answers0