I have a topic model script which I would like to make available as a shiny app
I have not got any experience using shiny and this will be my first effort. The input will always be an excel sheet with the same format, and I want the output to be an excel sheet with the topic model topics and sub topics. The script currently works fine but I want it to be available when I move onto a different team.
F<-read.csv("SourceandText2.csv", stringsAsFactors = FALSE)
F$charsinfeedback<-sapply(F$Text, function(x) nchar(x))
F$wordsinfeedback<-sapply(strsplit(F$Text, "\\s+"), length)
head(F$charsinfeedback)
F_stem<-str_replace_all(F$Text,"@","")
F_stem<-stemDocument(F_stem)
F_stem<-removePunctuation(F_stem)
F_stem<-tolower(F_stem)
F_stem<-stripWhitespace(F_stem)
F<-cbind(F,F_stem)
extendedstopwords<-c("a","amp","hark","day","via","harkiv","music","â€", "–", "–
", "it’s","â·â", "don’t" )
extendedstopwords<-c(extendedstopwords,
gsub("'","",grep("'",extendedstopwords,value = T)))
dtm.control<-list(
tolower=T,
removePunctuation=T,
removeNumbers=T,
stopwords=c(stopwords("english")),
stemming=F,
wordLengths=c(3,Inf),
weighting=weightTf
)
dtm<-DocumentTermMatrix(Corpus(VectorSource(F_stem)),
control = dtm.control)
dim(dtm)
dtm<-removeSparseTerms(dtm,0.999)
dim(dtm)
matrix<-as.matrix(dtm)
freq<-colSums(as.matrix(dtm))
length(freq)
ord<-order(freq,decreasing = T)
freq[head(ord)]
freq[tail(ord)]
findFreqTerms(dtm,lowfreq = 50)
set.seed(42)
wordcloud(names(freq),freq,min.freq = 10,colors = brewer.pal(6,"Dark2"))
burnin<-4000
iter<-2000
thin<-500
seed<-list(2003,5,63,10001,765)
nstart<-5
best<-TRUE
K<-10
rowTotals<-apply(dtm,1,sum)
empty.rows<-dtm[rowTotals==0,]$dimnames[1][[1]]
corpus3<-F_stem[as.numeric(empty.rows)]
dtm<-dtm[rowTotals>0,]
dim(dtm)
ldaOut3<-LDA(dtm,K,method="Gibbs", control=list(nstart=nstart, seed=seed, best=best, burnin=burnin, iter=iter, thin=thin))
ldaOut3.topics<-as.matrix(terms(ldaOut3))
write.csv(ldaOut3.topics, file = paste("LDAGibbs",K,"K3DocsToTopics.csv"))
ldaOut3.terms<-as.matrix(terms(ldaOut3,10))
write.csv(ldaOut3.terms,file = paste("LDAGibbs",K,"TopicsToTerms.csv"))
topicProbabilities3<-as.data.frame(ldaOut3@gamma)
write.csv(topicProbabilities3,file = paste("LDAGibbs",K,"TopicProbabilities.csv"))