I have two codes that scrape a page that is basically a search engine. It reads information from the google sheet, searches on the URL, gets some info and then, writes them on the sheet.
The problem is that I'm using two codes, the second is the one that writes the info into the google sheet.
The first code is doing all the search and then when every search is finished the second one starts to write the fetched information into the google sheets.
What I wanna do is to search one and then write, search the second one and write..... I've tried different ways but that's my first code and my first time programming, so I'm struggling with that.
k_bot.py (Web Scraper)
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
for cpf in self.cpfs:
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
cpf_updater.py (Info google sheet writer)
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT(cpfs)
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
EDIT k_bot.py
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self, cpf):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
EDIT cpf_updater.py
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT()
for cpf in self.cpfs:
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
self.sheet.add_rows(self.sheet)
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()