I am trying to scrape tokopedia here. When I use raw codes, it works and the json is returned. However when I tried to use it as a variable, it reads time out. website:https://www.tokopedia.com/samudrasembako/regal-marie-roll-230-gram?extParam=ivf%3Dfalse&src=topads Here's the code
!pip install fake_useragent
!pip install httpx
import requests
from fake_useragent import UserAgent
import httpx
tokopedia=['https://www.tokopedia.com/samudrasembako/regal-marie-roll-230-gram?extParam=ivf%3Dfalse&src=topads']
for url in tokopedia:
ua = UserAgent().random
product_key=url.split(".com")[1].split("/")[2].split("?")[0]
shopdomain=url.split(".com")[1].split("/")[1].split("?")[0]
payload={
"operationName":"PDPGetLayoutQuery",
"variables":
{"shopDomain":f"{shopdomain}",
"productKey":f"{product_key}",
"layoutID":"",
"apiVersion":1,
"userLocation":
{"cityID":"176",
"addressID":"0",
"districtID":"2274",
"postalCode":"",
"latlon":""},
"extParam":""},
"query":"fragment ProductVariant on pdpDataProductVariant {\n errorCode\n parentID\n defaultChild\n sizeChart\n totalStockFmt\n variants {\n productVariantID\n variantID\n name\n identifier\n option {\n picture {\n urlOriginal: url\n urlThumbnail: url100\n __typename\n }\n productVariantOptionID\n variantUnitValueID\n value\n hex\n stock\n __typename\n }\n __typename\n }\n children {\n productID\n price\n priceFmt\n optionID\n optionName\n productName\n productURL\n picture {\n urlOriginal: url\n urlThumbnail: url100\n __typename\n }\n stock {\n stock\n isBuyable\n stockWordingHTML\n minimumOrder\n maximumOrder\n __typename\n }\n isCOD\n isWishlist\n campaignInfo {\n campaignID\n campaignType\n campaignTypeName\n campaignIdentifier\n background\n discountPercentage\n originalPrice\n discountPrice\n stock\n stockSoldPercentage\n startDate\n endDate\n endDateUnix\n appLinks\n isAppsOnly\n isActive\n hideGimmick\n isCheckImei\n minOrder\n __typename\n }\n thematicCampaign {\n additionalInfo\n background\n campaignName\n icon\n __typename\n }\n __typename\n }\n __typename\n}\n\nfragment ProductMedia on pdpDataProductMedia {\n media {\n type\n urlOriginal: URLOriginal\n urlThumbnail: URLThumbnail\n urlMaxRes: URLMaxRes\n videoUrl: videoURLAndroid\n prefix\n suffix\n description\n variantOptionID\n __typename\n }\n videos {\n source\n url\n __typename\n }\n __typename\n}\n\nfragment ProductHighlight on pdpDataProductContent {\n name\n price {\n value\n currency\n __typename\n }\n campaign {\n campaignID\n campaignType\n campaignTypeName\n campaignIdentifier\n background\n percentageAmount\n originalPrice\n discountedPrice\n originalStock\n stock\n stockSoldPercentage\n threshold\n startDate\n endDate\n endDateUnix\n appLinks\n isAppsOnly\n isActive\n hideGimmick\n __typename\n }\n thematicCampaign {\n additionalInfo\n background\n campaignName\n icon\n __typename\n }\n stock {\n useStock\n value\n stockWording\n __typename\n }\n variant {\n isVariant\n parentID\n __typename\n }\n wholesale {\n minQty\n price {\n value\n currency\n __typename\n }\n __typename\n }\n isCashback {\n percentage\n __typename\n }\n isTradeIn\n isOS\n isPowerMerchant\n isWishlist\n isCOD\n isFreeOngkir {\n isActive\n __typename\n }\n preorder {\n duration\n timeUnit\n isActive\n preorderInDays\n __typename\n }\n __typename\n}\n\nfragment ProductCustomInfo on pdpDataCustomInfo {\n icon\n title\n isApplink\n applink\n separator\n description\n __typename\n}\n\nfragment ProductInfo on pdpDataProductInfo {\n row\n content {\n title\n subtitle\n applink\n __typename\n }\n __typename\n}\n\nfragment ProductDetail on pdpDataProductDetail {\n content {\n title\n subtitle\n applink\n showAtFront\n isAnnotation\n __typename\n }\n __typename\n}\n\nfragment ProductDataInfo on pdpDataInfo {\n icon\n title\n isApplink\n applink\n content {\n icon\n text\n __typename\n }\n __typename\n}\n\nfragment ProductSocial on pdpDataSocialProof {\n row\n content {\n icon\n title\n subtitle\n applink\n type\n rating\n __typename\n }\n __typename\n}\n\nquery PDPGetLayoutQuery($shopDomain: String, $productKey: String, $layoutID: String, $apiVersion: Float, $userLocation: pdpUserLocation, $extParam: String) {\n pdpGetLayout(shopDomain: $shopDomain, productKey: $productKey, layoutID: $layoutID, apiVersion: $apiVersion, userLocation: $userLocation, extParam: $extParam) {\n requestID\n name\n pdpSession\n basicInfo {\n alias\n createdAt\n isQA\n id: productID\n shopID\n shopName\n minOrder\n maxOrder\n weight\n weightUnit\n condition\n status\n url\n needPrescription\n catalogID\n isLeasing\n isBlacklisted\n menu {\n id\n name\n url\n __typename\n }\n category {\n id\n name\n title\n breadcrumbURL\n isAdult\n isKyc\n minAge\n detail {\n id\n name\n breadcrumbURL\n isAdult\n __typename\n }\n __typename\n }\n txStats {\n transactionSuccess\n transactionReject\n countSold\n paymentVerified\n itemSoldFmt\n __typename\n }\n stats {\n countView\n countReview\n countTalk\n rating\n __typename\n }\n __typename\n }\n components {\n name\n type\n position\n data {\n ...ProductMedia\n ...ProductHighlight\n ...ProductInfo\n ...ProductDetail\n ...ProductSocial\n ...ProductDataInfo\n ...ProductCustomInfo\n ...ProductVariant\n __typename\n }\n __typename\n }\n __typename\n }\n}\n"
}
headers={
'origin': 'https://www.tokopedia.com',
'referer': f'{url}',
'sec-ch-ua': '"Not?A_Brand";v="8", "Chromium";v="108", "Google Chrome";v="108"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': "Windows",
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'x-device': 'desktop',
'x-source': 'tokopedia-lite',
'x-tkpd-akamai': 'pdpGetLayout',
'x-tkpd-lite-service': 'zeus',
'x-version': '53ac990'
}
client= httpx.Client()
resp=client.post("https://gql.tokopedia.com/graphql/PDPGetLayoutQuery",json=payload,headers=headers)
Can someone please help? So that the above code runs and returns the json.