0

I am using Twitter4j Streaming API to collect tweets in real time filtered by certain keywords. I have create a schema for the tweet fields that I would require in MySQL and used JDBC connection to the table where I intend to store the tweets in real time.

The schema is as follows:

CREATE Table tweet(
TweetId long NOT NULL,
Product varchar(15),
sub_product varchar(15) default NULL,
TweetDate Timestamp NOT NULL ,
Tweetmsg varchar(250) default NULL,
tweetsource varchar(15) default NULL,
isTruncated Bool default false,
inReplyToStatusId int default 0, 
inReplyToUserId int default 0, 
isFavorited Bool default false, 
isRetweeted Bool default false, 
favoriteCount int default 0, 
inReplyToScreenName varchar(30) default NULL, 
geoLocation varchar(20) default NULL, 
place varchar(20) default NULL, 
retweetCount int default 0, 
isPossiblySensitive Bool default false, 
isoLanguageCode Bool default false, 
lang varchar(10) , 
retweetedStatus Bool default false, 
hashtagEntities varchar(20) , 
currentUserRetweetId int default 0, 
userid int, 
username varchar(30), 
screenName varchar(30) , 
userlocation varchar(20) , 
userdescription varchar(200) , 
isContributorsEnabled Bool default false, 
profileImageUrl varchar(50) default NULL, 
profileImageUrlHttp varchar(50) default NULL, 
url varchar(50) default NULL, 
isProtected Bool default false, 
followersCount int default 0, 
twitter_status Bool default false, 
profileUseBackgroundImage Bool default false, 
friendsCount int default 0, 
createdAt Date default NULL, 
favouritesCount int default 0, 
utcOffset int default 0, 
timeZone varchar(20), 
statusesCount int, 
isGeoEnabled Bool default false, 
isVerified Bool default false, 
translator Bool default false, 
listedCount int default 0, 
isFollowRequestSent Bool default false,
primary key(TweetDate)
);
ALTER TABLE tweet PARTITION BY RANGE(UNIX_TIMESTAMP(TweetDate))(
PARTITION JAN VALUES LESS THAN (UNIX_TIMESTAMP('2014-02-01')),
PARTITION FEB VALUES LESS THAN (UNIX_TIMESTAMP('2014-03-01')),
PARTITION MAR VALUES LESS THAN (UNIX_TIMESTAMP('2014-04-01')),
PARTITION APR VALUES LESS THAN (UNIX_TIMESTAMP('2014-05-01')),
PARTITION MAY VALUES LESS THAN (UNIX_TIMESTAMP('2014-06-01')),
PARTITION JUN VALUES LESS THAN (UNIX_TIMESTAMP('2014-07-01')),
PARTITION JUL VALUES LESS THAN (UNIX_TIMESTAMP('2014-08-01')),
PARTITION AUG VALUES LESS THAN (UNIX_TIMESTAMP('2014-09-01')),
PARTITION SEP VALUES LESS THAN (UNIX_TIMESTAMP('2014-10-01')),
PARTITION OCTO VALUES LESS THAN (UNIX_TIMESTAMP('2014-11-01')),
PARTITION NOV VALUES LESS THAN (UNIX_TIMESTAMP('2014-12-01')),
PARTITION DECE VALUES LESS THAN (UNIX_TIMESTAMP('2015-01-01'))
);

Note 1: Some of the fields like Product and Sub product will be given hard coded values for my application purpose. Other fields are fields that I get from the Twitter4j Streaming API.

Note 2: The table is partitioned by date in order to handle large volumes of data.

    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.SQLException;
    import java.sql.Statement;

    import twitter4j.FilterQuery;
    import twitter4j.StallWarning;
    import twitter4j.Status;
    import twitter4j.StatusDeletionNotice;
    import twitter4j.StatusListener;
    import twitter4j.TwitterStream;
    import twitter4j.TwitterStreamFactory;
    import twitter4j.User;
    import twitter4j.conf.ConfigurationBuilder;

    public class Stream {
        static int stop = 0;
        static int connect = 0;
        static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
        static final String DB_URL = "jdbc:mysql://localhost/Inxite";

        // Database credentials
        static final String USER = "root";
        static final String PASS = "";

        public static void main(String[] args) {
            ConfigurationBuilder cb = new ConfigurationBuilder();
            cb.setDebugEnabled(true);
            cb.setOAuthConsumerKey("dcY3gq8XwbizayxrERR5P15Ug");
            cb.setOAuthConsumerSecret("ekAxhWRy0hRnSCvDnMd8Z6IeryaqNqyCcJxAUoZnyhGvoSr3LF");
            cb.setOAuthAccessToken("98595421-PPhON5vnNByDIgA2YqCgJ6IGEhmkHoxRudB5010xE");
            cb.setOAuthAccessTokenSecret("AgFwK5Ruqpr1YIfxpxf8k6ChJkfqgWJiya4irdZDdElGB");

            TwitterStream twitterStream = new TwitterStreamFactory(cb.build())
                    .getInstance();

            StatusListener listener = new StatusListener() {

                @Override
                public void onException(Exception arg0) {
                    // TODO Auto-generated method stub

                }

                @Override
                public void onDeletionNotice(StatusDeletionNotice arg0) {
                    // TODO Auto-generated method stub

                }

                @Override
                public void onScrubGeo(long arg0, long arg1) {
                    // TODO Auto-generated method stub

                }

                @Override
                public void onStatus(Status status) {
                    User user = status.getUser();
                    Connection conn = null;
                    Statement stmt = null;

                    try {
                        if (stop != 1) {
                            // STEP 2: Register JDBC driver
                            if (connect != 1) {
                                Class.forName("com.mysql.jdbc.Driver");

                                // STEP 3: Open a connection
                                System.out.println("Connecting to    database...");
                                conn = DriverManager.getConnection(DB_URL, USER,
                                        PASS);
                                // no need to display the fields... directly insert
                                // them and work on it
                                System.out.println(status);
                                // EDIT THIS
                                System.out
                                        .println("Inserting records into the table...");
                                stmt = conn.createStatement();
                                connect = 1;
                            }
 String sql1 =
                              "INSERT INTO tweet(TweetID,Product,sub_product,TweetDate,"
                              +
                              "Tweetmsg,tweetsource,isTruncated,inReplyToStatusId,"
                              +
                              "inReplyToUserId,isFavorited,isRetweeted,favoriteCount,"
                              +
                              "inReplyToScreenName,geoLocation, place,retweetCount,isPossiblySensitive,"
                              +
                              "isoLanguageCode,lang ,retweetedStatus,hashtagEntities,currentUserRetweetId,"
                              +
                              "userid,username,screenName,userlocation,userdescription,isContributorsEnabled,"
                              +
                              "profileImageUrl,profileImageUrlHttp, url,isProtected,followersCount,"
                              +
                              "twitter_status,profileUseBackgroundImage,friendsCount,createdAt,"
                              +
                              "favouritesCount,utcOffset,timeZone,statusesCount,isGeoEnabled ,"
                              +
                              "isVerified,translator,listedCount,isFollowRequestSent)"
                              + " VALUES ( '" + status.getId() + ",'Pepsi'," +
                              ",‘Pepsi’," + status.getCreatedAt() + "," +
                              status.getText() + "," + status.getSource() + "," +
                              ' ' + "," + status.getInReplyToStatusId() + "," +
                              status.getInReplyToUserId() + "," + ' ' + "," + ' ' +
                              "," + ' ' + "," + status.getInReplyToScreenName() +
                              "," + status.getGeoLocation() + "," +
                              status.getPlace() + "," + status.getRetweetCount() +
                              "," + ' ' + "," + ' ' + "," + status.getLang() + ","
                              + status.getRetweetedStatus() + "," +
                              status.getHashtagEntities() + "," +
                              status.getCurrentUserRetweetId() + "," +
                              status.getUser().getId() + "," +
                              status.getUser().getName() + "," +
                              status.getUser().getScreenName() + "," +
                              status.getUser().getLocation() + "," +
                              status.getUser().getDescription() + "," +
                              status.getUser().isContributorsEnabled() + "," +
                              status.getUser().getProfileImageURL() + "," +
                              status.getUser().getProfileImageURLHttps() + "," +
                              status.getUser().getURL() + "," +
                              status.getUser().isProtected() + "," +
                              status.getUser().getFollowersCount() + "," +
                              status.getText() + "," + status.getUser()
                              .getProfileBackgroundImageURL() + "," +
                              status.getUser().getFriendsCount() + "," +
                              status.getUser().getCreatedAt() + "," +
                              status.getUser().getFavouritesCount() + "," +
                              status.getUser().getUtcOffset() + "," +
                              status.getUser().getTimeZone() + "," +
                              status.getUser().getStatusesCount() + "," +
                              status.getUser().isGeoEnabled() + "," +
                              status.getUser().isVerified() + "," +
                              status.getUser().isTranslator() + "," +
                              status.getUser().getListedCount() + "," +
                              status.getUser().isFollowRequestSent() + ")";

                            /*String sql1 = "INSERT INTO tweet(TweetID,Product,sub_product,TweetDate) values("
                                    + status.getId()
                                    + ",'Pepsi'"
                                    + ",‘Pepsi’,"
                                    + "'2015-01-01 12:12:12'"
                                    + ")";*/
                            System.out.println(sql1);
                            stmt.executeUpdate(sql1);
                            System.exit(0);
                            System.out
                                    .println("Inserted records into the table...");
                            if (status.getRateLimitStatus() != null) {
                                System.out
                                        .println("status.getRateLimitStatus().getLimit() = "
                                                + status.getRateLimitStatus()
                                                        .getLimit());
                                System.out
                                        .println("status.getRateLimitStatus().getRemaining() = "
                                                + status.getRateLimitStatus()
                                                        .getRemaining());
                                System.out
                                        .println("status.getRateLimitStatus().getResetTimeInSeconds() = "
                                                + status.getRateLimitStatus()
                                                        .getResetTimeInSeconds());
                                System.out
                                        .println("status.getRateLimitStatus().getSecondsUntilReset() = "
                                                + status.getRateLimitStatus()
                                                        .getSecondsUntilReset());
                                // System.out.println("status.getRateLimitStatus().getRemainingHits() = "
                                // +
                                // status.getRateLimitStatus().getRemainingHits());
                            }
                            stop = 1;
                            System.exit(0);
                            stmt.close();
                            conn.close();
                        }
                    } catch (SQLException se) {
                        // Handle errors for JDBC
                        se.printStackTrace();
                    } catch (Exception e) {
                        // Handle errors for Class.forName
                        e.printStackTrace();
                    } finally {
                        // finally block used to close resources
                        try {
                            if (stmt != null)
                                stmt.close();
                        } catch (SQLException se2) {
                        }// nothing we can do
                        try {
                            if (conn != null)
                                conn.close();
                        } catch (SQLException se) {
                            se.printStackTrace();
                        }// end finally try
                    }// end try

                }

                @Override
                public void onTrackLimitationNotice(int arg0) {
                    // TODO Auto-generated method stub

                }

                @Override
                public void onStallWarning(StallWarning arg0) {
                    // TODO Auto-generated method stub

                }

            };

            // More robust filtering like by location/time zone/etc here itself?
            FilterQuery fq = new FilterQuery();

            String keywords[] = { "pepsi" };

            fq.track(keywords);

            twitterStream.addListener(listener);
            twitterStream.filter(fq);

        }
    }

Issue #1: I am not able to store the captured tweets into MySQL tabs according to the schema I have set up.

Issue #2: I want to limit the number of tweets collected but I am not able to do that and the tweets keep streaming in.

Luiggi Mendoza
  • 85,076
  • 16
  • 154
  • 332
user3451166
  • 189
  • 13
  • Store tweets in list, count and than save that later on db. streamming api is fast thread.. you mast have sam queue, for instance LinkedBlockingQueue – Orbita Jun 03 '14 at 21:06
  • If slow stream is your problem: http://stackoverflow.com/questions/27576904/twitter4j-slow-status-stream/ – Sam Jan 02 '15 at 14:26

0 Answers0