so im trying to scrape an api. I found the url and i can make a post request to retrieve the data by using the session_id i got from opening the site manually. This is the code:
import requests
import json
from cred import session_id, origin, referer, website, api_url
cookies = {
'_ga': 'GA1.2.1463518167.1654244215',
'tz': 'Africa/Lagos',
'frontend_lang': 'en_US',
'session_id': session_id,
'cids': '1',
'_gid': 'GA1.2.402550658.1654629813',
}
headers = {
'authority': website,
'accept': '*/*',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8',
'origin': origin,
'referer': referer,
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33',
}
json_data = {
'id': 22,
'jsonrpc': '2.0',
'method': 'call',
'params': {
'args': [],
'model': 'report.pos.order',
'method': 'read_group',
'kwargs': {
'context': {
'pivot_column_groupby': [
'date:month',
],
'pivot_row_groupby': [
'product_id',
],
},
'domain': [],
'fields': [
'product_qty:sum',
'average_price:avg',
'price_total:sum',
],
'groupby': [
'product_id',
],
'lazy': False,
},
},
}
response = requests.post(api_url, cookies=cookies, headers=headers, json=json_data)
print(response.json())
However, when i attempt to obtain session_id prgrammatically, with requests, im told the session has expired.
import requests
import json
from cred import origin, referer, website, login, password, api_url
from bs4 import BeautifulSoup as bs
headers = {
'authority': website,
'accept': '*/*',
'accept-language': 'en-GB,en;q=0.9,en-US;q=0.8',
'origin': origin,
'referer': referer,
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33',
}
json_data = {
'id': 22,
'jsonrpc': '2.0',
'method': 'call',
'params': {
'args': [],
'model': 'report.pos.order',
'method': 'read_group',
'kwargs': {
'context': {
'pivot_column_groupby': [
'date:month',
],
'pivot_row_groupby': [
'product_id',
],
},
'domain': [],
'fields': [
'product_qty:sum',
'average_price:avg',
'price_total:sum',
],
'groupby': [
'product_id',
],
'lazy': False,
},
},
}
def get_csrf():
#get csrf_token
r = s.get('https://www.odoo.com/web/login')
soup = bs(r.content,'html.parser')
g = soup.head.script.text
g = g.split('\"')
csrf_token = g[1]
return csrf_token
with requests.Session() as s:
csrf_token = get_csrf()
data = {
'csrf_token': csrf_token,
'login': login,
'password': password,
'redirect': ''
}
r = s.post('https://www.odoo.com/web/login', headers=headers, data=data)
re = s.post(api_url, cookies=r.cookies, headers=headers, json=json_data)
print (re.json())
I suspected that my script wasnt giving me a 'good' session_id to scrape the api. So i wrote another script(below) using playwright module
from cred import password, login
from playwright.sync_api import sync_playwright
import time
with sync_playwright() as p:
browser = p.webkit.launch(headless=False)
baseurl = "https://www.odoo.com/web/login"
context = browser.new_context()
page = context.new_page()
page.goto(baseurl)
cookie = context.cookies()
print(cookie) #show me what cookies you have
page.fill('#login', login)
page.fill('#password', password)
page.click("button[type='submit']")
time.sleep(2000.2) #just to give me time to check what the session_id is on the network tab
browser.close()
to print my session_ id and gave it some sleep time so i can go into the element tab of webkit and compare what was printed in my terminal as my cookie value to what the browser showed. They were different.
If i use the cookie from the element tab of webkit in the original script it works while the one from thw terminal dosent. Why is that? and how can i get it to work programatically?