my problem statement
i have a set of health data related to new york dataset.
file structure such as
longitude lattitude day cases '
40.00 -73.25 1 1
so created a file have records around 17699997. so i have to create a stream using stream package extracting a 1000 data from a file.and then i m going to apply Denstram() algorithm for online cluster and same 1000 point get and apply offline algorithm such K-means, DBSACN.
i want to take 1000 points from the file apply online and offline cluster. the aging take 1000 until the stream is going to end.
so i have created the following code. but in which i unable to located the end of stream.
but i want to notify user tha stream is end.
two process
online process
offline process
Denstream_Dbscan<-function(Wrapper_Data_Stream)
{
i<-1;
while(TRUE)
{
# online process(micro cluster)
start_time<-Sys.time();
print(paste("Start online Process ",i," iteration",Sys.time()));
dstream_micro<-DSC_DenStream(epsilon=0.01,initPoints=200,minPoints=500);
system.time({
cluster(dstream_micro,Wrapper_Data_Stream,n=1000,verbose=TRUE);
})
# print(paste("center of cluster=",get_center(dstream_micro)));
print(paste("no of micro cluster=",nclusters(dstream_micro,type="auto")));
jpeg(paste("Micro Cluster(Online)_",i,".jpeg"));
plot(dstream_micro,dsd=Wrapper_Data_Stream,pch=c(1,2),col_points="red",col_clusters="green",xlab="latitude",ylab="longitude", main=paste("TimeTaken=",start_time-end_time),sub=paste("micro cluster:",Sys.time()),col.main="red",col.sub="black");
print(paste("End online Process for ",i," iteration",Sys.time()));
dev.off();
# offline process (macro cluster)
print(paste("Start offline Process ",i," iteration",Sys.time()));
start_time<-Sys.time();
dbscan_macro<-DSC_DBSCAN(eps=0.1)
system.time({
recluster(dbscan_macro,dstream_micro);
})
print(paste("no of macro cluster=",nclusters(dbscan_macro)));
# print(paste("center of cluster=",get_center(dstream_micro)));
end_time<-Sys.time();
jpeg(paste("Macro Cluster(offline)_",i,".jpeg"));
plot(dbscan_macro,Wrapper_Data_Stream,pch=c(1,2),col_points="grey",col_clusters="blue",xlab="latitude",ylab="longitude",main=paste("TimeTaken=",start_time-end_time),sub=paste("macro cluster:",Sys.time()),col.main="red",col.sub="black");
dev.off();
print(paste("End Offline Process for ",i," iteration",Sys.time()));
i<-i+1;
}
}