i use scrapy playwright to scrape zillow but instead of getting all 40 listings i just get 8 i made sure i used the css selector for all 40 listings please help
import scrapy
from ..items import ZillowScrapItem
from scrapy_playwright.page import PageMethod
from scrapy.selector import Selector
class ScrapSpider(scrapy.Spider):
name = "scrap"
def start_requests(self):
yield scrapy.Request(
url="https://www.zillow.com/san-jose-ca/scroll",
meta=dict(
playwright=True,
playwright_include_page=True,
playwright_page_methods=[
PageMethod("wait_for_selector", '.property-card-data a'),
PageMethod('evaluate', "window.scrollBy(0, document.body.scrollHeight)"),
PageMethod("wait_for_selector", '#zpid_19470263 .Image-c11n-8-84-0__sc-1rtmhsc-0'),
],
errback=self.errback,
),
callback=self.parse
)
async def parse(self, response):
items = ZillowScrapItem()
price = response.css('.bqsBln span::text').extract()
address = response.css('address::text').extract()
bed = response.css('.dmDolk li:nth-child(1) b::text').extract()
bath = response.css('.dmDolk li:nth-child(2) b::text').extract()
area = response.css('.dmDolk li~ li+ li b::text').extract()
link = response.css('.property-card-data a::attr(href)').extract()
img_link = response.css('.gFiOAr::attr(src)').extract()
items['price'] = price
items['address'] = address
items['bed'] = bed
items['bath'] = bath
items['area'] = area
items['link'] = link
items['img_link'] = img_link
yield items
async def errback(self, failure):
page = failure.request.meta["playwright_page"]
await page.close()
i have tried response.meta but it's not working iam using ubuntu wsl on windows 10 what is the proplem here?
page = response.meta["playwright"]