0

I'm trying to scrape a page with Selenium, Selenium-wire (for auth proxies) and random-user-agent.

The script runs normally, but after a few lines of searching (around 2 or 3) I get the title error. I searched a lot, and saw that a possible solution would be put a sleep right before driver.get(), however, that didn't help. I added this amount of try & except to try to understand what is happening, but this error does not fall on any of the excepts

from colorama import Fore, Back, Style, init
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from multiprocessing import freeze_support
from time import sleep
import os
from seleniumwire import webdriver
import numpy as np
from threading import Thread
import threading
import webbrowser
import sys
from sys import exit
import json
from fake_useragent import UserAgent

# clear and disable debug mode
os.environ["WDM_LOG_LEVEL"] = "0"

# colorama startup
init(autoreset=True)

# chromedriver startup
WEBDRIVER_SVC = ChromeService(ChromeDriverManager().install())

# driver
def spawn_driver(line: str):
    while True:
        try:
            try:
                ua = UserAgent()
                user_agent = ua.chrome
                CHROME_OPTIONS = Options()
                CHROME_OPTIONS.add_argument(f"user-agent={user_agent}")
                CHROME_OPTIONS.add_argument("--headless")
                CHROME_OPTIONS.add_argument("--disable-gpu")
                CHROME_OPTIONS.add_argument("--window-size=1920,1080")
                CHROME_OPTIONS.add_argument("--disable-extensions")
                CHROME_OPTIONS.add_argument("--start-maximized")
                CHROME_OPTIONS.add_experimental_option(
                    "excludeSwitches", ["enable-logging"]
                )
                WIRE_OPTIONS = {
                    "proxy": {
                        "http": f"http://myuser:myhost@domainproxy.com",
                        "https": f"https://myuser:myhost@domainproxy.com",
                    }
                }
                driver = webdriver.Chrome(
                    seleniumwire_options=WIRE_OPTIONS,
                    service=WEBDRIVER_SVC,
                    options=CHROME_OPTIONS,
                )

                sleep(30)

                driver.get("https://www.google.com/")

                sleep(3)

                WebDriverWait(driver, 30).until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "#search"))
                ).click()

                # search for every line on the filename.txt

                sleep(10)

            except Exception as e:
                driver.close()
                continue

            try:
                not_found = WebDriverWait(driver, 5).until(
                    EC.presence_of_element_located(
                        (By.XPATH, '//*[@id="alertError"]/div/div[1]')
                    )
                )
                print(not_found.text)
                driver.close()
                break

            except Exception as e:
                try:
                    # validation point
                    found_msg = WebDriverWait(driver, 30).until(
                        EC.presence_of_element_located(
                            (By.CSS_SELECTOR, "#lnk-accountRecovery > span")
                        )
                    )

                    print(found_msg.text)
                    break

                except Exception as E:
                    try:

                        blocked_msg = WebDriverWait(driver, 30).until(
                            EC.presence_of_element_located(
                                (
                                    By.CSS_SELECTOR,
                                    "#__next > div > form > div.sc-fFTYTi.hNKDnZ > div.sc-gQNndl.bcmdmD > div.sc-jOBXIr.ivZFJS > div.sc-kIWQTW.gVwKbm > h5",
                                )
                            )
                        )

                        print()
                        continue

                    except:
                        continue

                finally:
                    driver.close()
        except:
            driver.close()
            continue


def parse_list(unique_list):
    for line in unique_list:
        spawn_driver(line)


def parse_file(filename: str, threads: int):
    with open(filename, "r") as f:
        lines = f.readlines()
        splited_file = np.array_split(lines, int(threads))
        for unique_list in splited_file:
            try:
                t = Thread(target=parse_list, args=(unique_list,))
                t.start()
            except:
                sleep(1)


def start():
    filename = "myfile.txt"
    threads = 5
    output = "results.txt"
    # execute script
    parse_file(filename, threads, output)


def main():
    start()


if __name__ == "__main__":
    freeze_support()
    main()

0 Answers0