I'm trying to run a code to do some web scraping in Twitter but I keep having an error that I don't manage to solve. I guess it is related to selenium webdriver, but I am not very familiar with it and I haven't found any solutions so far.
This is the code:
import pandas
import numpy
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
class SeleniumClient(object):
def __init__(self):
#Initialization method.
self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
self.chrome_options.add_argument('--headless')
self.chrome_options.add_argument('--no-sandbox')
self.chrome_options.add_argument('--disable-setuid-sandbox')
# you need to provide the path of chromdriver in your system
self.browser = webdriver.Chrome('C:\Program Files\Google\chromedriver.exe')
self.base_url = 'https://twitter.com/search?q='
def get_tweets(self, query):
'''
Function to fetch tweets.
'''
try:
self.browser.get(self.base_url+query)
time.sleep(2)
body = self.browser.find_element_by_tag_name('body')
for _ in range(3000):
body.send_keys(Keys.PAGE_DOWN)
time.sleep(0.3)
timeline = self.browser.find_element_by_id('timeline')
tweet_nodes = timeline.find_elements_by_css_selector('.tweet-text')
return pd.DataFrame({'tweets': [tweet_node.text for tweet_node in tweet_nodes]})
except:
print("Selenium - An error occured while fetching tweets.")
#this next line is the one that gives the error, but it must be located in the class
selenium_client = SeleniumClient()
tweets_df = selenium_client.get_tweets('AI and Deep learning')
This is what the error says:
AttributeError Traceback (most recent call last)
<ipython-input-5-3bd40446c1fd> in <module>
----> 1 selenium_client = SeleniumClient()
2 #tweets_df = selenium_client.get_tweets('AI and Deep learning')
<ipython-input-3-f0c81bf234aa> in __init__(self)
4 def __init__(self):
5 #Initialization method.
----> 6 self.chrome_options = webdriver.ChromeOptions('D:/chromedriver_win32/chromedriver', options=self.chrome_options)
7 self.chrome_options.add_argument('--headless')
8 self.chrome_options.add_argument('--no-sandbox')
AttributeError: 'SeleniumClient' object has no attribute 'chrome_options'