0

I want to capture a full web page screenshot of https://world.taobao.com/ in English. Url can differ, the webpage can be in any language. I have tried

    options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})

and

    options.add_argument('--lang=en'). 

But nothing is working. I am getting a screenshot which is in Chinese with improper dimension. I want a full page screenshot in English.

Here is my code:

    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    import time
    from selenium.webdriver.common.keys import Keys
    from PIL import Image
    from io import BytesIO

    def save_screenshot(browser, file_name):
        height, width = scroll_down(browser)
        browser.set_window_size(width, height)
        img_binary = browser.get_screenshot_as_png()
        img = Image.open(BytesIO(img_binary))
        img.save(file_name)
        # print(file_name)
        print(" screenshot saved ")

    def scroll_down(browser):
        total_width = browser.execute_script("return document.body.offsetWidth")
        total_height = browser.execute_script("return document.body.parentNode.scrollHeight")
        viewport_width = browser.execute_script("return document.body.clientWidth")
        viewport_height = browser.execute_script("return window.innerHeight")
        viewport_height = browser.execute_script("return window.innerHeight")

        rectangles = []

        i = 0
        while i < total_height:
            ii = 0
            top_height = i + viewport_height

            if top_height > total_height:
                top_height = total_height

            while ii < total_width:
                top_width = ii + viewport_width

                if top_width > total_width:
                    top_width = total_width

                rectangles.append((ii, i, top_width, top_height))

                ii = ii + viewport_width

            i = i + viewport_height

        previous = None
        part = 0

        for rectangle in rectangles:
            if not previous is None:
                browser.execute_script("window.scrollTo({0}, {1})".format(rectangle[0], rectangle[1]))
                time.sleep(0.5)
            # time.sleep(0.2)

            if rectangle[1] + viewport_height > total_height:
                offset = (rectangle[0], total_height - viewport_height)
            else:
                offset = (rectangle[0], rectangle[1])

            previous = rectangle

        return (total_height, total_width)  

    options = Options()
    options.headless = True
    #options.add_experimental_option('prefs', {'intl.accept_languages': 'en,en_US'})
    options.add_argument('--lang=en')

    browser = webdriver.Chrome(executable_path="chromedriver.exe",options=options)

    browser.maximize_window()

    browser.get('https://world.taobao.com/')
    save_screenshot(browser, 'baidu.png')

    browser.quit()
Sachin Gupta
  • 186
  • 1
  • 14
  • 1
    Have you looked at https://stackoverflow.com/questions/53717431/chromedriver-how-to-translate-a-page-using-selenium – Booboo Jul 04 '20 at 13:14
  • I saw that question. But in that case language should be known to us. But I don't know the language of urls which I am targeting. I want to translate any webpage to English in whichever language it is. – Sachin Gupta Jul 04 '20 at 13:17
  • The language preferences you set affect the browser, not the web page. You will likely need to navigate their web UI to change the language preference before scraping pages. – Greg Burghardt Jul 04 '20 at 14:54
  • Can you give a small example? – Sachin Gupta Jul 04 '20 at 16:10

0 Answers0