I am trying to scrape for Tweets using Twint. The goal is to get tweets based on a keyword and print them according to the custom format provided by me. While setting up the twint configurations to scrape , I faced the following error
<ipython-input-19-dc1bb332d2a2> in <module>
5 c.Limit = 1
6 c.Pandas = True
----> 7 twint.run.Search(c)
14 frames
/content/twint/twint/format.py in Tweet(config, t)
21 output = output.replace("{hashtags}", ",".join(t.hashtags))
22 output = output.replace("{cashtags}", ",".join(t.cashtags))
---> 23 output = output.replace("{replies}", t.replies_count)
24 output = output.replace("{retweets}", t.retweets_count)
25 output = output.replace("{likes}", t.likes_count)
TypeError: replace() argument 2 must be str, not int
The code I am trying is :
# Set up TWINT config
c = twint.Config()
import nest_asyncio
nest_asyncio.apply()
c.Search = "football"
# Custom output format
c.Format = "Username: {username} | Tweet: {tweet}"
c.Limit = 1
c.Pandas = True
twint.run.Search(c)
I checked the format.py
and the replace() function config looks like this
def Tweet(config, t):
if config.Format:
logme.debug(__name__+':Tweet:Format')
output = config.Format.replace("{id}", t.id_str)
output = output.replace("{conversation_id}", t.conversation_id)
output = output.replace("{date}", t.datestamp)
output = output.replace("{time}", t.timestamp)
output = output.replace("{user_id}", t.user_id_str)
output = output.replace("{username}", t.username)
output = output.replace("{name}", t.name)
output = output.replace("{place}", t.place)
output = output.replace("{timezone}", t.timezone)
output = output.replace("{urls}", ",".join(t.urls))
output = output.replace("{photos}", ",".join(t.photos))
output = output.replace("{video}", str(t.video))
output = output.replace("{thumbnail}", t.thumbnail)
output = output.replace("{tweet}", t.tweet)
output = output.replace("{language}", t.lang)
output = output.replace("{hashtags}", ",".join(t.hashtags))
output = output.replace("{cashtags}", ",".join(t.cashtags))
output = output.replace("{replies}", t.replies_count)
output = output.replace("{retweets}", t.retweets_count)
output = output.replace("{likes}", t.likes_count)
output = output.replace("{link}", t.link)
output = output.replace("{is_retweet}", str(t.retweet))
output = output.replace("{user_rt_id}", str(t.user_rt_id))
output = output.replace("{quote_url}", t.quote_url)
output = output.replace("{near}", t.near)
output = output.replace("{geo}", t.geo)
output = output.replace("{mentions}", ",".join(t.mentions))
output = output.replace("{translate}", t.translate)
output = output.replace("{trans_src}", t.trans_src)
output = output.replace("{trans_dest}", t.trans_dest)
else:
logme.debug(__name__+':Tweet:notFormat')
output = f"{t.id_str} {t.datestamp} {t.timestamp} {t.timezone}
If there is anything anyone could help me with that would be great. If the question needs to be rewritten, also please let me know, not very good at forming questions.