I am working with Python attempting to store tweets (more precisely only their date, user, bio and text) related to a specific keyword in a csv file. As I am working on the free-to-use API of Twitter, I am limited to 450 tweets every 15 minutes. So I have coded something which is supposed to store exactly 450 tweets in 15 minutes.
BUT the problem is something goes wrong when extracting the tweets so that at a specific point the same tweet is stored again and again.
Any help would be much appreciated !! Thanks in advance
import time
from twython import Twython, TwythonError, TwythonStreamer
twitter = Twython(CONSUMER_KEY, CONSUMER_SECRET)
sfile = "tweets_" + keyword + todays_date + ".csv"
id_list = [last_id]
count = 0
while count < 3*60*60*2: #we set the loop to run for 3hours
# tweet extract method with the last list item as the max_id
print("new crawl, max_id:", id_list[-1])
tweets = twitter.search(q=keyword, count=2, max_id=id_list[-1])["statuses"]
time.sleep(2) ## 2 seconds rest between api calls (450 allowed within 15min window)
for status in tweets:
id_list.append(status["id"]) ## append tweet id's
if status==tweets[0]:
continue
if status==tweets[1]:
date = status["created_at"].encode('utf-8')
user = status["user"]["screen_name"].encode('utf-8')
bio = status["user"]["description"].encode('utf-8')
text = status["text"].encode('utf-8')
with open(sfile,'a') as sf:
sf.write(str(status["id"])+ "|||" + str(date) + "|||" + str(user) + "|||" + str(bio) + "|||" + str(text) + "\n")
count += 1
print(count)
print(date, text)