library functions
library(tm)
library(e1071)
library(plyr)
Inserting the journal names to be categorized
sample = c(
"An Inductive Inference Machine",
"Computing Machinery and Intelligence",
"On the translation of languages from left to right",
"First Draft of a Report on the EDVAC",
"The Rendering Equation")
corpus <- Corpus(VectorSource(sample))
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
corpus <- tm_map(corpus, stemDocument,language="english")
corpus <- tm_map(corpus, stripWhitespace)
dtm <- DocumentTermMatrix(corpus)
term document matrix as training set
inspect(dtm)
Category=c("Machine learning","Artificial intelligence","Compilers","Computer architecture","Computer graphics")
declaration of the categories
my.data=data.frame(as.matrix(dtm),Category)
my.data
sample = c(
"gprof: A Call Graph Execution Profiler",
"Architecture of the IBM System/360",
"A Case for Redundant Arrays of Inexpensive Disks (RAID)",
"Determining Optical Flow",
"A relational model for large shared data banks",
"some complementarity problems of z and lyoponov like transformations on edclidean jordan algebra")
corpus <- Corpus(VectorSource(sample))
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removeWords, stopwords("english"))
corpus <- tm_map(corpus, stemDocument,language="english")
corpus <- tm_map(corpus, stripWhitespace)
dtm1 <- DocumentTermMatrix(corpus)
term document matrix as testing set
inspect(dtm1)