import asyncio
from playwright.async_api import Playwright, async_playwright, expect
#Get images urls
#Output: img_urls.csv (Consist of instances of {"property_id": str, "img_urls": []}
async def run(playwright):
image_urls = [] #Will contain instances of {"property_id": "value", "img_url": [img_urls]}
browser = await playwright.chromium.launch(headless=False)
context = await browser.new_context()
# Open new page
page = await context.new_page()
# Go to https://www.zoopla.co.uk/for-sale/details/49240624/
await page.goto("https://www.zoopla.co.uk/for-sale/details/49240624/")
# Click button:has-text("Accept all cookies")
await page.frame_locator("[aria-label=\"Privacy Manager window\\.\"]").locator("button:has-text(\"Accept all cookies\")").click()
# Click next image
for i in range(5):
await page.locator("[data-testid=\"arrow_right\"]").click()
#Fetch img urls
imgs = await page.query_selector_all("img")
for img in imgs:
src = await img.get_attribute("src")
print(src)
# ---------------------
await context.close()
await browser.close()
async def main() -> None:
async with async_playwright() as playwright:
await run(playwright)
asyncio.run(main())
The above would return
https://lid.zoocdn.com/u/2400/1800/26d9845a91c7fe21834b531a292533dcf16f6754.jpg
https://lid.zoocdn.com/u/2400/1800/2267900ffd5e795f568bf1305a5eab0b95e59e5f.jpg
https://lid.zoocdn.com/u/2400/1800/75d0a22274ed94c1b33db52f5c5cc1022905df02.jpg
https://lid.zoocdn.com/u/2400/1800/d459c1d0ff8e7a1b52f667a48e593f72f91ea368.jpg
https://lid.zoocdn.com/u/2400/1800/0e21775e213c064fd83916b27e795536c816edb0.jpg
https://maps.googleapis.com/maps/api/staticmap?size=792x398&format=jpg&scale=2¢er=51.535651,-0.006482&maptype=roadmap&zoom=15&channel=Lex-LDP&client=gme-zooplapropertygroup&sensor=fa
lse&markers=scale:2%7Cicon:https://r.zoocdn.com/assets/map-static-pin-purple-76.png%7C51.535651,-0.006482&signature=EZukT7ugiBKGYFT9F9phLleIXBs=
https://lid.zoocdn.com/u/2400/1800/55e734ae277ee03b2d46f55298acd762727bd727.gif
https://r.zoocdn.com/_next/static/images/natwest-dd532b27dc13112df4f05058c26a990a.svg
https://st.zoocdn.com/zoopla_static_agent_logo_(584439).png
Every time you click on the right arrow to move on to the next image it would return a different number of urls in the console. How to you specifically only get the urls that end with ".jpg" or simply contain ".jpg"? (I could check if contain ".jpg" with Python but just want to know if there's an official way to achievement this with Playwright Python?)