I'm trying to run a twint search to retrieve a list of tweets, on which I perform sentiment analysis on. I've created a for loop that iterates through a pandas dataframe of dates and runs the twint search with given date parameters.
Here's my code:
import twint
import pandas
from textblob import TextBlob
# Functions
def twint_to_pandas(columns): #Creds to Favio Vazques
return twint.output.panda.Tweets_df[columns]
def getTweets(st, startDate, endDate): #runs a twint search and returns a pandas df
c = twint.Config()
c.Search= str(st)
c.Limit = 20
c.Lang = "en"
c.Since = startDate
c.Until = endDate
#c.Verified = True
c.Hide_output = True
c.Pandas = True
twint.run.Search(c)
df = twint_to_pandas(["date", "username", "tweet"])
return df
def getSentiScore(string):
t = TextBlob(str(string)) #create a textblob class instance
score = t.sentiment.polarity # get sentiment
return score #pass it to next function
def getAverageScore(st, startDate, endDate):
df = getTweets(st, startDate, endDate) #establish a variable for the fetched tweets
results = [getSentiScore(str(x)) for x in df['tweet']] #list comprehension
resultsDf = pandas.DataFrame(results, columns=['sentiScore']).dropna() #create dataframe for it
mean = resultsDf['sentiScore'].mean() #get a mean sentiment score
#median = resultsDf['sentiScore'].median()
#mode = resultsDf['sentiScore'].mode()
print("Mean" + str(mean)) # print the mean
#print("Median" + str(median))
#print("Mode" + str(mode))
def weeklyScoreToCSV(st, startDate, days):
datetime = pandas.date_range(start=(str(startDate)), freq='D', periods=days, closed='left')
datetimeDf = datetime.to_frame(index=False, name='date')
datesDf = [i for i in (datetimeDf['date'])]
dateLength = int(len(datesDf)-1)
for i in range(0, dateLength):
sentiScore = getAverageScore(st, str(datesDf[i]), str(datesDf[i+1]))
#print(str(datesDf[i]) + str(datesDf[i+1]))
# Execution
#getAverageScore("Obama")
weeklyScoreToCSV("a", '01/01/2019', 10)
In the weeklyScoreToCSV function, whenever I input the date parameters for the getAverageScore function call manually, the function works perfectly. However when I attempt to use the given code,
I am given the following error
KeyError: "None of [Index(['date', 'username', 'tweet'], dtype='object')] are in the [columns]"
I cannot figure out where I'm going wrong.