2

I need to track many keywords on twitter and send the tweets to MongoDB. I used this for my code :

How can I consume tweets from Twitter's streaming api and store them in mongodb

import json
import pymongo
import tweepy

consumer_key = ""
consumer_secret = ""
access_key = ""
access_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)


class CustomStreamListener(tweepy.StreamListener):
    def __init__(self, api):
        self.api = api
        super(tweepy.StreamListener, self).__init__()

        self.db = pymongo.MongoClient().test

    def on_data(self, tweet):
        self.db.tweets.insert(json.loads(tweet))

    def on_error(self, status_code):
        return True # Don't kill the stream

    def on_timeout(self):
        return True # Don't kill the stream


sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))

to_track = ['keyword1', 'keyword2', 'keyword3']

sapi.filter(track = to_track)

Is there a way for me to keep track of which keyword is responsible for each tweet coming in ? (Without doing a grep search in each one)

Community
  • 1
  • 1
François M.
  • 4,027
  • 11
  • 30
  • 81

1 Answers1

1

I'm not sure how the on_data function works but you could use on_status and do something like below:

import tweepy
consumer_key = ''
consumer_secret = ''
access_key = ''
access_secret = ''



auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)


class CustomStreamListener(tweepy.StreamListener):    
    def on_status(self, status):
        tweet = status.text        
        words = tweet.split()
        if 'keyword1' in words:
            print "do something with keyword1"
            self.db.tweets.insert(json.loads(tweet))
        if 'keyword2' in words:
            print "do something with keyword2"
            self.db.tweets.insert(json.loads(tweet))
        if 'keyword3' in words:
            print "do something with keyword3"
            self.db.tweets.insert(json.loads(tweet))
sapi = tweepy.streaming.Stream(auth, CustomStreamListener(api))

to_track = ['keyword1', 'keyword2', 'keyword3']

sapi.filter(track = to_track)
Mike C.
  • 1,761
  • 2
  • 22
  • 46