I am using Twitter4j Streaming API to collect tweets in real time filtered by certain keywords. I have create a schema for the tweet fields that I would require in MySQL and used JDBC connection to the table where I intend to store the tweets in real time.
The schema is as follows:
CREATE Table tweet(
TweetId long NOT NULL,
Product varchar(15),
sub_product varchar(15) default NULL,
TweetDate Timestamp NOT NULL ,
Tweetmsg varchar(250) default NULL,
tweetsource varchar(15) default NULL,
isTruncated Bool default false,
inReplyToStatusId int default 0,
inReplyToUserId int default 0,
isFavorited Bool default false,
isRetweeted Bool default false,
favoriteCount int default 0,
inReplyToScreenName varchar(30) default NULL,
geoLocation varchar(20) default NULL,
place varchar(20) default NULL,
retweetCount int default 0,
isPossiblySensitive Bool default false,
isoLanguageCode Bool default false,
lang varchar(10) ,
retweetedStatus Bool default false,
hashtagEntities varchar(20) ,
currentUserRetweetId int default 0,
userid int,
username varchar(30),
screenName varchar(30) ,
userlocation varchar(20) ,
userdescription varchar(200) ,
isContributorsEnabled Bool default false,
profileImageUrl varchar(50) default NULL,
profileImageUrlHttp varchar(50) default NULL,
url varchar(50) default NULL,
isProtected Bool default false,
followersCount int default 0,
twitter_status Bool default false,
profileUseBackgroundImage Bool default false,
friendsCount int default 0,
createdAt Date default NULL,
favouritesCount int default 0,
utcOffset int default 0,
timeZone varchar(20),
statusesCount int,
isGeoEnabled Bool default false,
isVerified Bool default false,
translator Bool default false,
listedCount int default 0,
isFollowRequestSent Bool default false,
primary key(TweetDate)
);
ALTER TABLE tweet PARTITION BY RANGE(UNIX_TIMESTAMP(TweetDate))(
PARTITION JAN VALUES LESS THAN (UNIX_TIMESTAMP('2014-02-01')),
PARTITION FEB VALUES LESS THAN (UNIX_TIMESTAMP('2014-03-01')),
PARTITION MAR VALUES LESS THAN (UNIX_TIMESTAMP('2014-04-01')),
PARTITION APR VALUES LESS THAN (UNIX_TIMESTAMP('2014-05-01')),
PARTITION MAY VALUES LESS THAN (UNIX_TIMESTAMP('2014-06-01')),
PARTITION JUN VALUES LESS THAN (UNIX_TIMESTAMP('2014-07-01')),
PARTITION JUL VALUES LESS THAN (UNIX_TIMESTAMP('2014-08-01')),
PARTITION AUG VALUES LESS THAN (UNIX_TIMESTAMP('2014-09-01')),
PARTITION SEP VALUES LESS THAN (UNIX_TIMESTAMP('2014-10-01')),
PARTITION OCTO VALUES LESS THAN (UNIX_TIMESTAMP('2014-11-01')),
PARTITION NOV VALUES LESS THAN (UNIX_TIMESTAMP('2014-12-01')),
PARTITION DECE VALUES LESS THAN (UNIX_TIMESTAMP('2015-01-01'))
);
Note 1: Some of the fields like Product and Sub product will be given hard coded values for my application purpose. Other fields are fields that I get from the Twitter4j Streaming API.
Note 2: The table is partitioned by date in order to handle large volumes of data.
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import twitter4j.FilterQuery;
import twitter4j.StallWarning;
import twitter4j.Status;
import twitter4j.StatusDeletionNotice;
import twitter4j.StatusListener;
import twitter4j.TwitterStream;
import twitter4j.TwitterStreamFactory;
import twitter4j.User;
import twitter4j.conf.ConfigurationBuilder;
public class Stream {
static int stop = 0;
static int connect = 0;
static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
static final String DB_URL = "jdbc:mysql://localhost/Inxite";
// Database credentials
static final String USER = "root";
static final String PASS = "";
public static void main(String[] args) {
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true);
cb.setOAuthConsumerKey("dcY3gq8XwbizayxrERR5P15Ug");
cb.setOAuthConsumerSecret("ekAxhWRy0hRnSCvDnMd8Z6IeryaqNqyCcJxAUoZnyhGvoSr3LF");
cb.setOAuthAccessToken("98595421-PPhON5vnNByDIgA2YqCgJ6IGEhmkHoxRudB5010xE");
cb.setOAuthAccessTokenSecret("AgFwK5Ruqpr1YIfxpxf8k6ChJkfqgWJiya4irdZDdElGB");
TwitterStream twitterStream = new TwitterStreamFactory(cb.build())
.getInstance();
StatusListener listener = new StatusListener() {
@Override
public void onException(Exception arg0) {
// TODO Auto-generated method stub
}
@Override
public void onDeletionNotice(StatusDeletionNotice arg0) {
// TODO Auto-generated method stub
}
@Override
public void onScrubGeo(long arg0, long arg1) {
// TODO Auto-generated method stub
}
@Override
public void onStatus(Status status) {
User user = status.getUser();
Connection conn = null;
Statement stmt = null;
try {
if (stop != 1) {
// STEP 2: Register JDBC driver
if (connect != 1) {
Class.forName("com.mysql.jdbc.Driver");
// STEP 3: Open a connection
System.out.println("Connecting to database...");
conn = DriverManager.getConnection(DB_URL, USER,
PASS);
// no need to display the fields... directly insert
// them and work on it
System.out.println(status);
// EDIT THIS
System.out
.println("Inserting records into the table...");
stmt = conn.createStatement();
connect = 1;
}
String sql1 =
"INSERT INTO tweet(TweetID,Product,sub_product,TweetDate,"
+
"Tweetmsg,tweetsource,isTruncated,inReplyToStatusId,"
+
"inReplyToUserId,isFavorited,isRetweeted,favoriteCount,"
+
"inReplyToScreenName,geoLocation, place,retweetCount,isPossiblySensitive,"
+
"isoLanguageCode,lang ,retweetedStatus,hashtagEntities,currentUserRetweetId,"
+
"userid,username,screenName,userlocation,userdescription,isContributorsEnabled,"
+
"profileImageUrl,profileImageUrlHttp, url,isProtected,followersCount,"
+
"twitter_status,profileUseBackgroundImage,friendsCount,createdAt,"
+
"favouritesCount,utcOffset,timeZone,statusesCount,isGeoEnabled ,"
+
"isVerified,translator,listedCount,isFollowRequestSent)"
+ " VALUES ( '" + status.getId() + ",'Pepsi'," +
",‘Pepsi’," + status.getCreatedAt() + "," +
status.getText() + "," + status.getSource() + "," +
' ' + "," + status.getInReplyToStatusId() + "," +
status.getInReplyToUserId() + "," + ' ' + "," + ' ' +
"," + ' ' + "," + status.getInReplyToScreenName() +
"," + status.getGeoLocation() + "," +
status.getPlace() + "," + status.getRetweetCount() +
"," + ' ' + "," + ' ' + "," + status.getLang() + ","
+ status.getRetweetedStatus() + "," +
status.getHashtagEntities() + "," +
status.getCurrentUserRetweetId() + "," +
status.getUser().getId() + "," +
status.getUser().getName() + "," +
status.getUser().getScreenName() + "," +
status.getUser().getLocation() + "," +
status.getUser().getDescription() + "," +
status.getUser().isContributorsEnabled() + "," +
status.getUser().getProfileImageURL() + "," +
status.getUser().getProfileImageURLHttps() + "," +
status.getUser().getURL() + "," +
status.getUser().isProtected() + "," +
status.getUser().getFollowersCount() + "," +
status.getText() + "," + status.getUser()
.getProfileBackgroundImageURL() + "," +
status.getUser().getFriendsCount() + "," +
status.getUser().getCreatedAt() + "," +
status.getUser().getFavouritesCount() + "," +
status.getUser().getUtcOffset() + "," +
status.getUser().getTimeZone() + "," +
status.getUser().getStatusesCount() + "," +
status.getUser().isGeoEnabled() + "," +
status.getUser().isVerified() + "," +
status.getUser().isTranslator() + "," +
status.getUser().getListedCount() + "," +
status.getUser().isFollowRequestSent() + ")";
/*String sql1 = "INSERT INTO tweet(TweetID,Product,sub_product,TweetDate) values("
+ status.getId()
+ ",'Pepsi'"
+ ",‘Pepsi’,"
+ "'2015-01-01 12:12:12'"
+ ")";*/
System.out.println(sql1);
stmt.executeUpdate(sql1);
System.exit(0);
System.out
.println("Inserted records into the table...");
if (status.getRateLimitStatus() != null) {
System.out
.println("status.getRateLimitStatus().getLimit() = "
+ status.getRateLimitStatus()
.getLimit());
System.out
.println("status.getRateLimitStatus().getRemaining() = "
+ status.getRateLimitStatus()
.getRemaining());
System.out
.println("status.getRateLimitStatus().getResetTimeInSeconds() = "
+ status.getRateLimitStatus()
.getResetTimeInSeconds());
System.out
.println("status.getRateLimitStatus().getSecondsUntilReset() = "
+ status.getRateLimitStatus()
.getSecondsUntilReset());
// System.out.println("status.getRateLimitStatus().getRemainingHits() = "
// +
// status.getRateLimitStatus().getRemainingHits());
}
stop = 1;
System.exit(0);
stmt.close();
conn.close();
}
} catch (SQLException se) {
// Handle errors for JDBC
se.printStackTrace();
} catch (Exception e) {
// Handle errors for Class.forName
e.printStackTrace();
} finally {
// finally block used to close resources
try {
if (stmt != null)
stmt.close();
} catch (SQLException se2) {
}// nothing we can do
try {
if (conn != null)
conn.close();
} catch (SQLException se) {
se.printStackTrace();
}// end finally try
}// end try
}
@Override
public void onTrackLimitationNotice(int arg0) {
// TODO Auto-generated method stub
}
@Override
public void onStallWarning(StallWarning arg0) {
// TODO Auto-generated method stub
}
};
// More robust filtering like by location/time zone/etc here itself?
FilterQuery fq = new FilterQuery();
String keywords[] = { "pepsi" };
fq.track(keywords);
twitterStream.addListener(listener);
twitterStream.filter(fq);
}
}
Issue #1: I am not able to store the captured tweets into MySQL tabs according to the schema I have set up.
Issue #2: I want to limit the number of tweets collected but I am not able to do that and the tweets keep streaming in.