I'm trying to scrap my Odoo's inventory pages to be able to get all the pictures already in the DB, more than 9000, so i wrote a python script using selenium, it works like a charm but not on my Odoo's pages, the fact is that on Odoo's inventory pages i think the source code is genreated by AJAX or Js or something else. So when i try to scrap it i won't have the good source, so i cannot get the elements. I really don't want to have to download the 9000+ images and names by hand, if someone know how to capture the good source code could you please tell me ?
Python code :
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup as bs
import time
import urllib.request
import requests
import ssl
#Vars
base_url = "https://yourodoourlhere.com"
img_list=[]
symbols = [":","/","\"","?","<",">","\\","*","|"]
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
gcontext = ssl.SSLContext()
def Login():
driver.get(base_url+"/web/login")
login_form = driver.find_element(By.XPATH, "//*[@id='login']")
#print(login_form) #Test
login_form.send_keys('your@odoologinhere.com')
password_form = driver.find_element(By.XPATH, "//*[@id='password']")
password_form.send_keys('yourodoopasswordhere')
log_button = driver.find_element(By.XPATH, "/html/body/div[1]/main/div/form/div[3]/button")
log_button.click()
GetInventory()
#print(driver.current_url) get current url
#time.sleep(5) #specify the seconds TEST
#driver.close()
def GetInventory():
url=base_url+"/web#action=375&model=product.template&view_type=kanban&cids=1&menu_id=198"
driver.get(url)
CaptureImages()
def CaptureImages():
url = driver.current_url
driver.get(url)
r=driver.execute_script("return document.documentElement.innerHTML;")
print(r)
#get HTML src
#r = requests.get(url,stream=True).text
#BeautifulSoup
soup = bs(r, 'html.parser')
#find all images
all_imgs = soup.find_all('img')
#get images list
for image in all_imgs:
img_list.append(image['src'])
print(img_list)
#Loop to format images names and DL
for img_url in img_list:
img_name=img_url
for symbol in symbols:
if symbol in img_name:
img_name=img_name.replace(symbol,"")
#print(img_name)
ssl._create_default_https_context = ssl._create_unverified_context
urllib.request.urlretrieve(base_url+img_url, img_name)
time.sleep(5) #specify the seconds TEST
driver.close()
#CaptureImages()
Login()
from what i've found in chrome inspector, a page at https://myodoourl.com/web/dataset/search_read will have in "preview" section the following datas :
{jsonrpc: "2.0", id: 815011021, result: {length: 8120,…}}
id: 815011021
jsonrpc: "2.0"
result: {length: 8120,…}
length: 8120
records: [{id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…},…]
0: {id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
1: {id: 13965, product_variant_count: 2, currency_id: [1, "EUR"], activity_state: false,…}
2: {id: 13961, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
3: {id: 13442, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
4: {id: 13966, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
5: {id: 10820, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
6: {id: 10635, product_variant_count: 16, currency_id: [1, "EUR"], activity_state: false,…}
7: {id: 4348, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
8: {id: 12785, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
9: {id: 12797, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
10: {id: 12782, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
11: {id: 12781, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
12: {id: 10721, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
13: {id: 588, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "001 Misc",…}
14: {id: 13551, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
15: {id: 589, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "002 Misc",…}
16: {id: 590, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "003 Misc",…}
17: {id: 591, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "004 Misc",…}
18: {id: 950, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "005 Misc",…}
19: {id: 951, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "006 Misc",…}
20: {id: 10814, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
21: {id: 952, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "007 Misc",…}
22: {id: 953, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "008 Misc",…}
23: {id: 954, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false, name: "009 Misc",…}
24: {id: 11755, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
25: {id: 11756, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
26: {id: 11757, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
27: {id: 11744, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
28: {id: 4470, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
29: {id: 13623, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
30: {id: 5004, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
31: {id: 5134, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
32: {id: 5005, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
33: {id: 10158, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
34: {id: 10337, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
35: {id: 4712, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
36: {id: 11702, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
37: {id: 12504, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
38: {id: 10212, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
39: {id: 1134, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
40: {id: 1133, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
41: {id: 1132, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
42: {id: 1131, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
43: {id: 1130, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
44: {id: 1129, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
45: {id: 1128, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
46: {id: 1126, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
47: {id: 13331, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
48: {id: 1127, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
49: {id: 1124, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
50: {id: 1125, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
51: {id: 1123, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
52: {id: 979, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
53: {id: 978, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
54: {id: 1120, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
55: {id: 216, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
56: {id: 4045, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
57: {id: 1119, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
58: {id: 4044, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
59: {id: 10947, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
60: {id: 2369, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
61: {id: 2404, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
62: {id: 2405, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
63: {id: 1990, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
64: {id: 1559, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
65: {id: 2406, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
66: {id: 1558, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
67: {id: 1171, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
68: {id: 2930, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
69: {id: 5082, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
70: {id: 259, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
71: {id: 2551, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
72: {id: 5208, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
73: {id: 1170, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
74: {id: 11217, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
75: {id: 1138, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
76: {id: 969, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
77: {id: 1140, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
78: {id: 1999, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
79: {id: 403, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
and in each record there is :
0: {id: 13968, product_variant_count: 1, currency_id: [1, "EUR"], activity_state: false,…}
activity_state: false
currency_id: [1, "EUR"]
default_code: "12121212121212"
id: 13968
lst_price: 10
name: "\"Monkey's fist\" key ring Red"
product_variant_count: 1
qty_available: 0
type: "product"
uom_id: [1, "Units"]
Is there a way i could get the "id" and "name" fields from python ???