I am interested in retrieving tweet streams about the forth-coming Nigeria's general election. I want all tweets from Nigeria, which contains information about 4 major presidential candidates only ("atiku-abubakar-rabiu-kwankwaso-peter-obi-bola-tinubu-inec"
).
However, from what I am retrieving are the moments are tweets, majority of which do not relate to the keywords (the rules), not even about politics or elections.
My code:
import tweepy
import json
import sqlite3
import time
BEARER = "my-bearer-key"
try:
connection = sqlite3.connect('inec-2023-tweets.db')
cursor = connection.cursor()
print(f"Database connection successful! \n")
except sqlite3.Error as error:
print(f'Error while connecting to sqlite {error}')
class MyListener(tweepy.StreamingClient):
def on_data(self, data):
new_data = str(data) #
data_obj = json.loads(data.decode('utf8'))
data_obj = json.dumps(data_obj, indent=2)
print('\nTweet data received, sending to db...\n')
u_timestamp = int(time.time())
query = "INSERT INTO raw_data(timestamp, payload) VALUES(?,?)"
data = (u_timestamp, data_obj)
try:
cursor.execute(query, data)
connection.commit()
print('\nData saved.')
except sqlite3.Error as error:
print(f"Error while working with SQLite: {error}")
return True
def on_connect(self):
print('\n Connected..!')
def on_error(self, status):
print(status)
return True
stream = MyListener(BEARER)
stream.add_rules(tweepy.StreamRule('place_country:NG has:geo', tag="atiku-abubakar-rabiu-kwankwaso-peter-obi-bola-tinubu-inec"))
stream.filter(tweet_fields=["geo","created_at","author_id","context_annotations"],
place_fields=["id","geo","name","country_code","place_type","full_name","country"],
expansions=["geo.place_id","referenced_tweets.id"])
Sample tweets retrieved:
1|1666603722|{
"data": {
"author_id": "3301724376",
"context_annotations": [
{
"domain": {
"id": "46",
"name": "Business Taxonomy",
"description": "Categories within Brand Verticals that narrow down the scope of Brands"
},
"entity": {
"id": "1557193940978135808",
"name": "Gaming Business",
"description": "Brands, companies, advertisers and every non-person handle with the profit intent related to offline and online games such as gaming consoles, tabletop games, video game publishers"
}
},
{
"domain": {
"id": "47",
"name": "Brand",
"description": "Brands and Companies"
},
"entity": {
"id": "1502374025170882561",
"name": "WhatsApp",
"description": "WhatsApp Messenger, or simply WhatsApp, is an internationally available American freeware, cross-platform centralized instant messaging and voice-over-IP service owned by Meta Platforms."
}
}
],
"created_at": "2022-10-24T09:28:36.000Z",
"edit_history_tweet_ids": [
"1584476943512059905"
],
"geo": {
"place_id": "13e62ac32ad46001"
},
"id": "1584476943512059905",
"text": "Good morning \ud83e\udd70\nThis is a great week to shop for new sheets\ud83d\ude4f\ud83c\udffc\u2764\ufe0f\n\nBedsheets and pillowcases only \n6/6 - NGN 6000\n6/7 - NGN 6500\n7/7 - NGN 7500\n \nKindly DM or WhatsApp 08062407473 to order \nLocation is Lagos\nNationwide delivery guaranteed \ud83d\udcaf\n@_DammyB_ @yay_tunes @unclemidetush "
},
"includes": {
"places": [
{
"country": "Nigeria",
"country_code": "NG",
"full_name": "Lagos University Teaching Hospital",
"geo": {
"type": "Feature",
"bbox": [
3.354450897360182,
6.519118684124127,
3.354450897360182,
6.519118684124127
],
"properties": {}
},
"id": "13e62ac32ad46001",
"name": "Lagos University Teaching Hospital",
"place_type": "poi"
}
],
"tweets": [
{
"author_id": "3301724376",
"context_annotations": [
{
"domain": {
"id": "46",
"name": "Business Taxonomy",
"description": "Categories within Brand Verticals that narrow down the scope of Brands"
},
"entity": {
"id": "1557696940178935808",
"name": "Gaming Business",
"description": "Brands, companies, advertisers and every non-person handle with the profit intent related to offline and online games such as gaming consoles, tabletop games, video game publishers"
}
},
{
"domain": {
"id": "47",
"name": "Brand",
"description": "Brands and Companies"
},
"entity": {
"id": "1502374025170882561",
"name": "WhatsApp",
"description": "WhatsApp Messenger, or simply WhatsApp, is an internationally available American freeware, cross-platform centralized instant messaging and voice-over-IP service owned by Meta Platforms."
}
}
],
"created_at": "2022-10-24T09:28:36.000Z",
"edit_history_tweet_ids": [
"1584476943512059905"
],
"geo": {
"place_id": "13e62ac32ad46001"
},
"id": "1584476943512059905",
"text": "Good morning \ud83e\udd70\nThis is a great week to shop for new sheets\ud83d\ude4f\ud83c\udffc\u2764\ufe0f\n\nBedsheets and pillowcases only \n6/6 - NGN 6000\n6/7 - NGN 6500\n7/7 - NGN 7500\n \nKindly DM or WhatsApp 08062407473 to order \nLocation is Lagos\nNationwide delivery guaranteed \ud83d\udcaf\n@_DammyB_ @yay_tunes @unclemidetush "
}
]
},
"matching_rules": [
{
"id": "1575129079472443401",
"tag": "atiku-abubakar-rabiu-kwankwaso-peter-obi-bola-tinubu-inec"
}
]
}
- How do I filter tweets to contain only those keywords
"atiku-abubakar-rabiu-kwankwaso-peter-obi-bola-tinubu-inec"
(or hashtag).