Simply get the list of your docx files first, it will yield the name of the files. Then run the char_segment function on them them by a lapply, loop, or purrr::map()
The following code assumes that your target documents are stored in a directory called "docx" within your working directory.
library(quanteda)
library(readtext) ## Remember to include in your posts the libraries required to replicate the code.
list_of_docx <- list.files(path = "./docx", ## Looks inside the ./docx directory
full.names = TRUE, ## retrieves the full path to the documents
pattern = "[.]docx$", ## retrieves al documents whose names ends in ".docx"
ignore.case = TRUE) ## ignores the letter case of the document's names
Preparing the for loop
df_docx <- data.frame() ## Create an empty dataframe to store your data
for (d in seq_along(list_of_docx)) { ## Tell R to run the loop/iterate along the number of elements within thte list of doccument paths
temp_object <-readtext(list_of_docx[d])
temp_segmented_object <- char_segment(temp_object$text, pattern = ",", remove_pattern = TRUE)
temp_df <- as.data.frame(temp_segmented_object)
colnames(temp_df) <- "segments"
temp_df$title <- as.character(list_of_docx[d]) ## Create a variable with the title of the source document
temp_df <- temp_df[, c("title", "segments")]
df_docx <- rbind(df_docx, temp_df) ## Append each dataframe to the previously created empty dataframe
rm(temp_df, temp_object, d)
df_docx
}
head(df_docx)