0
    from tweepy.streaming import StreamListener
    from tweepy import OAuthHandler
    from tweepy import Stream
    import tweepy
    import textblob
    import re
    from textblob import TextBlob
    import pandas as pd     
    import numpy as np      

    ACCESS_TOKEN="XXXX"
    ACCESS_SECRET="XXXX"
    CONSUMER_KEY="XXXX"
    CONSUMER_SECRET="XXXX"

    def twitter_setup():
        """
        Utility function to setup the Twitter's API
        with our access keys provided.
        """
        # Authentication and access using keys:
        auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
        auth.set_access_token(ACCESS_TOKEN, ACCESS_SECRET)

        # Return API with authentication:
        api = tweepy.API(auth)
        return api
        extractor = twitter_setup()


    tweets = extractor.user_timeline(screen_name="realDonaldTrump", count=200)

    data = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])

data['len']  = np.array([len(tweet.text) for tweet in tweets])
data['ID']   = np.array([tweet.id for tweet in tweets])
data['Date'] = np.array([tweet.created_at for tweet in tweets])
data['Source'] = np.array([tweet.source for tweet in tweets])
data['Likes']  = np.array([tweet.favorite_count for tweet in tweets])
data['RTs']    = np.array([tweet.retweet_count for tweet in tweets])

def clean_tweet(tweet):
    '''
    Utility function to clean the text in a tweet by removing 
    links and special characters using regex.
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def analize_sentiment(tweet):
    '''
    Utility function to classify the polarity of a tweet
    using textblob.
    '''
    analysis = TextBlob(clean_tweet(tweet))
    #print(analysis.sentiment.polarity)
    if analysis.sentiment.polarity > 0:
        return 1
    elif analysis.sentiment.polarity == 0:
        return 0
    else:
        return -1

data['SA'] = np.array([ analize_sentiment(tweet) for tweet in data['Tweets'] ])


display(data.head(200))

I am working on a Project, in this project we are extracting tweets of some of the world leaders and then we will try to compare their relationships with other countries based on their twitter comment. So far we have extracted the tweets from Donald Trump Account We have categorized the tweets into positive and negative but what problem I am facing is how we can separate the tweets country-wise, Is their any way by which only those tweets are extracted in which he/she has tweeted about some country and the rest of the tweets are ignored so that we can only get the tweets related to the country.

Kyle Gunn
  • 23
  • 7

2 Answers2

1

I don't have enough reputation to add a comment, but you need to know that you have posted all your access tokens and that is a bad idea.

Kyle Gunn
  • 23
  • 7
0

You might load-up a list of countries such as: github repo by marijn. It also contains a list with nationalities github repo by marijn

Check per tweet whether a name in the list occurs (so you would have to iterate over the list). You might add a counter for each country occuring per tweet. Add this counter-data as a column to your dataframe (similar to your earlier approach to analyze the sentiment).

This is just an idea, I'm not able to comment yet due to the fact I'm new.

romanoww
  • 121
  • 1
  • 1
  • 6