How can I fix a dfm error when combining a VectorSource corpus and a dictionary?

Question

I'm trying to recreate the following statistics program from the web page Analyzing the US elections with Facebook and R.

I've had to stray from the original code in order to get it to work, but I can't seem to get past the following error:

> # Apply Dictionary
> fb_liwc <- dfm(corpus, dictionary=myDi .... [TRUNCATED]
Error in UseMethod("dfm") :
  no applicable method for 'dfm' applied to an object of class "c('VCorpus','Corpus')"

What do I need to do to make the dfm method compile correctly?

My code is as follows:

library(Rfacebook)
library(stringr)
library(reshape2)
library(tm)
library(quanteda)

#######################################################################################
# Request Access token (!!!!access token will only be valid for two hours!!!!)via:
# https://developers.facebook.com/tools/explorer/
# Requires Facebook Account
token <- "XXXX" # access token should be inserted here

#######################################################################################

# Request posts
# Simple Example: Hillary Clinton (posts for one day: 2017-07-07)
fb_page <- getPage(page= "889307941125736", token=token, since='2016/07/07',until='2016/07/08')
fb_page$order <- 1:nrow(fb_page)

# Function to download the comments
download.maybe <- function(i, refetch=FALSE, path=".")
{
  post <- getPost(post=fb_page$id[i], comments = TRUE, likes = TRUE, token=token)
  post1 <- as.data.frame(melt(post))
}

# Apply function to download comments
files <- data.frame(melt(lapply(fb_page$order, download.maybe)))

# Select only comments
files_c <- files[complete.cases(files$message),]

# Split ID to abstract POST_ID
files_c$id2 <- lapply(strsplit(as.character(files_c$id), "_"), "[", 1)
files_c$ch <- nchar(files_c$id2)
files_a <- files_c[ which(files_c$ch >12), ]

# Change column name
names(files_a)[11] <- "POST_ID"

# Define date
files_a$date <- lapply(strsplit(as.character(files_a$created_time), "T"), "[", 1)
files_a$date1 <- as.character(files_a$date)

# Dine identifier to count comments
files_a$tempID <- 1

#######################################################################################
# Clean Data
dat2 <- gsub("[^[:alnum:]///' ]", "", files_a$message)
dat2 <- data.frame(dat2)

dat3 <- gsub("([.-])|[[:punct:]]", " ", dat2$dat2)
dat3 <- data.frame(dat3)
dat4 <- iconv(dat3$dat3, "latin1", "ASCII", sub="")
dat4 <- data.frame(dat4)

dat5 <- gsub('[[:digit:]]+', '', dat4$dat4)
dat5 <- data.frame(dat5)

dat6 <- tolower(dat5$dat5)
dat6 <- data.frame(dat6)

dat7 <- gsub("'", " ", dat6$dat6)
dat7 <- data.frame(dat7)

dat8 <- gsub("/", " ", dat7$dat7)
dat8 <- data.frame(dat8)

#######################################################################################
# Steps to replace empty entries
# Function to replace blanks with missing NA
blank2na <- function(x)
{
  z <- gsub("\\s+", "", x)
  x[z==""] <- NA
  return(x)
}

# Replace blanks with 'NA'
dat10 <- data.frame(sapply(dat8, blank2na))
dat10 <- data.frame(dat10)

# Define the relevant column as numeric
dat12 <- as.numeric(dat10$dat8)
dat12 <- data.frame(dat12)

# Define function if entry is numeric(non-numeric)
f <- function(x) is.numeric(x) & !is.na(x)
dat14 <- f(dat12$dat12)
dat14 <- data.frame(dat14)

# Reverse definition of numeric/character
dat16 <- as.character(ifelse(dat14$dat14 == "FALSE", dat8$dat8, 1010101010101010))
dat16 <- data.frame(dat16)
dat16 <- as.character(dat16$dat16)

# Combine NA and real value !!!! Select a individual Term (here: "Hallo")!!!!
dat8 <- as.character(dat8$dat8)
dat17 <- ifelse(dat16 != 1010101010101010, "HALLO", dat8)
dat17 <- data.frame(dat17)

######################################################################################
# Define Corpus
dat17$ch <- nchar(as.character(dat17$dat17))
dat17$bb <- ifelse(dat17$ch<4, "HALLO", as.character(dat17$dat17))
dat18 <- as.data.frame(dat17$bb [grep("nchar", dat17$bb ) ])
dat17$dat17 <- as.character(dat17$dat17)
dat_r <- as.data.frame(dat17)
colnames(dat_r)[1] <- "dat_r"
dat_r$dat_r <- as.character(dat_r$dat_r)
corpus <- Corpus(VectorSource(dat_r$dat_r))

######################################################################################
# Load Dictionary (https://www.cs.uic.edu/~liub/FBS/sentiment-analysis.html#lexicon)
# Negative/Positive Words
hu.liu.pos=scan('~/Documents/Project/positive-words.txt', what='character', comment.char = ';')
hu.liu.neg=scan('~/Documents/Project/negative-words.txt', what='character', comment.char = ';')
# Optional: Add Words to List
pos.words=c(hu.liu.pos, 'like')
neg.words=c(hu.liu.neg, 'bad')

# Combine Dictionaries
myDict <- dictionary(list(positive = pos.words, negative = neg.words))

######################################################################################
# Apply Dictionary
fb_liwc <- dfm(corpus, dictionary=myDict)
fb1 <- as.data.frame(fb_liwc)

######################################################################################
# Combine Analysis Data and Original Data
ALL <- cbind(files_a, fb1)

score 0 · Answer 1 · edited May 20 '23 at 11:48

0

Try to create the corpus with:

 corpus <- VCorpus(VectorSource(dat_r$dat_r))

This may not give you that error.

edited May 20 '23 at 11:48

Peter Mortensen

30,738
21
105
131

answered Aug 01 '16 at 21:10

tia_0

412
1
3
11

How can I fix a dfm error when combining a VectorSource corpus and a dictionary?

1 Answers1