Here is another approach based on the R package RDCOMClient
library(RDCOMClient)
####################################################################
#### Step 1 : We use the OCR of Word to convert the PDF to Word ####
####################################################################
wordApp <- COMCreate("Word.Application")
wordApp[["Visible"]] <- TRUE
wordApp[["DisplayAlerts"]] <- 0
path_To_PDF_File <- "C:\\PlantTraitAsia.pdf"
path_To_Word_File <- "C:\\PlantTraitAsia.docx"
doc <- wordApp[["Documents"]]$Open(normalizePath(path_To_PDF_File),
ConfirmConversions = FALSE)
doc$SaveAs2(path_To_Word_File)
###############################################################
#### Step 2 : We extract the tables from the word document ####
###############################################################
nb_Tables <- doc$tables()$count()
list_Table <- list()
for(l in 1 : nb_Tables)
{
print(l)
nb_Row <- doc$tables(l)$Rows()$Count()
nb_Col <- doc$tables(l)$Columns()$Count()
mat_Temp <- matrix(NA, nrow = nb_Row, ncol = nb_Col)
for(i in 1 : nb_Row)
{
for(j in 1 : nb_Col)
{
mat_Temp[i, j] <- doc$tables(l)$cell(i, j)$range()$text()
}
}
list_Table[[l]] <- mat_Temp
}
final_Mat <- do.call("rbind", list_Table)
final_Mat[, 1 : 4]
We obtain the following matrix
[,1] [,2] [,3]
[1,] "ID\r\a" "Category of permissio\r\a" "Species\r\a"
[2,] "83\r\a" "A\r\a" "Abies mariesii Masters\r\a"
[3,] "155\r\a" "A\r\a" "Abies mariesii Masters\r\a"
[4,] "225\r\a" "A\r\a" "Abies mariesii Masters\r\a"
[5,] "297\r\a" "A\r\a" "Abies mariesii Masters\r\a"
[6,] "369\r\a" "A\r\a" "Abies mariesii Masters\r\a"
[7,] "706\r\a" "B\r\a" "Abies sachalinensis (Schmidt) Masters\r\a"
[8,] "750\r\a" "B\r\a" "Abies sachalinensis (Schmidt) Masters\r\a"
[9,] "794\r\a" "B\r\a" "Abies sachalinensis (Schmidt) Masters\r\a"
[10,] "837\r\a" "B\r\a" "Abies sachalinensis (Schmidt) Masters\r\a"
[11,] "881\r\a" "B\r\a" "Abies sachalinensis (Schmidt) Masters\r\a"
[12,] "6\r\a" "A\r\a" "Abies spectabilis (D. Don) Mirbel\r\a"
[13,] "10\r\a" "A\r\a" "Abies spectabilis (D. Don) Mirbel\r\a"
[14,] "82\r\a" "A\r\a" "Abies veitchii Lindley\r\a"
[15,] "154\r\a" "A\r\a" "Abies veitchii Lindley\r\a"
[16,] "224\r\a" "A\r\a" "Abies veitchii Lindley\r\a"
[17,] "296\r\a" "A\r\a" "Abies veitchii Lindley\r\a"
[18,] "368\r\a" "A\r\a" "Abies veitchii Lindley\r\a"
[19,] "707\r\a" "B\r\a" "Acanthopanax senticosus (Rupr. et Maxim.) Harms.\r\a"
[20,] "751\r\a" "B\r\a" "Acanthopanax senticosus (Rupr. et Maxim.) Harms.\r\a"
[21,] "795\r\a" "B\r\a" "Acanthopanax senticosus (Rupr. et Maxim.) Harms.\r\a"
[22,] "838\r\a" "B\r\a" "Acanthopanax senticosus (Rupr. et Maxim.) Harms.\r\a"
[23,] "882\r\a" "B\r\a" "Acanthopanax senticosus (Rupr. et Maxim.) Harms.\r\a"
[24,] "708\r\a" "B\r\a" "Acer japonicum Thunb.\r\a"
[25,] "752\r\a" "B\r\a" "Acer japonicum Thunb.\r\a"
[26,] "796\r\a" "B\r\a" "Acer japonicum Thunb.\r\a"
[27,] "839\r\a" "B\r\a" "Acer japonicum Thunb.\r\a"
[28,] "883\r\a" "B\r\a" "Acer japonicum Thunb.\r\a"
[29,] "97\r\a" "A\r\a" "Acer micranthum Sieb. et Zucc.\r\a"
[30,] "169\r\a" "A\r\a" "Acer micranthum Sieb. et Zucc.\r\a"
[31,] "239\r\a" "A\r\a" "Acer micranthum Sieb. et Zucc.\r\a"
[32,] "311\r\a" "A\r\a" "Acer micranthum Sieb. et Zucc.\r\a"
[33,] "383\r\a" "A\r\a" "Acer micranthum Sieb. et Zucc.\r\a"
[34,] "99\r\a" "A\r\a" "Acer mono var. mayrii Sugimoto\r\a"
[35,] "171\r\a" "A\r\a" "Acer mono var. mayrii Sugimoto\r\a"
[36,] "241\r\a" "A\r\a" "Acer mono var. mayrii Sugimoto\r\a"
[37,] "313\r\a" "A\r\a" "Acer mono var. mayrii Sugimoto\r\a"
[38,] "385\r\a" "A\r\a" "Acer mono var. mayrii Sugimoto\r\a"
[39,] "130\r\a" "A\r\a" "Acer morifolium Koidz.\r\a"
[40,] "200\r\a" "A\r\a" "Acer morifolium Koidz.\r\a"
[41,] "272\r\a" "A\r\a" "Acer morifolium Koidz.\r\a"
[42,] "344\r\a" "A\r\a" "Acer morifolium Koidz.\r\a"
[43,] "416\r\a" "A\r\a" "Acer morifolium Koidz.\r\a"
[44,] "472\r\a" "B\r\a" "Acer morifolium Koidz.\r\a"
[45,] "522\r\a" "B\r\a" "Acer morifolium Koidz.\r\a"
[46,] "662\r\a" "B\r\a" "Acer morifolium Koidz.\r\a"
[47,] "709\r\a" "B\r\a" "Actaea asiatica Hara.\r\a"
[48,] "753\r\a" "B\r\a" "Actaea asiatica Hara.\r\a"
[49,] "797\r\a" "B\r\a" "Actaea asiatica Hara.\r\a"
[50,] "840\r\a" "B\r\a" "Actaea asiatica Hara.\r\a"
[51,] "884\r\a" "B\r\a" "Actaea asiatica Hara.\r\a"
[52,] "710\r\a" "B\r\a" "Actinidia arguta (Sieb. et Zucc.) Planch. ex Miq.\r\a"
[53,] "754\r\a" "B\r\a" "Actinidia arguta (Sieb. et Zucc.) Planch. ex Miq.\r\a"
[54,] "798\r\a" "B\r\a" "Actinidia arguta (Sieb. et Zucc.) Planch. ex Miq.\r\a"
[55,] "841\r\a" "B\r\a" "Actinidia arguta (Sieb. et Zucc.) Planch. ex Miq.\r\a"
[56,] "885\r\a" "B\r\a" "Actinidia arguta (Sieb. et Zucc.) Planch. ex Miq.\r\a"
[57,] "51\r\a" "A\r\a" "Alangium javanicum\r\a"
[58,] "473\r\a" "B\r\a" "Aleurites cordata (thunb.) R. Br. ex Steudel.\r\a"
[59,] "523\r\a" "B\r\a" "Aleurites cordata (thunb.) R. Br. ex Steudel.\r\a"
[60,] "580\r\a" "B\r\a" "Aleurites cordata (thunb.) R. Br. ex Steudel.\r\a"
[61,] "622\r\a" "B\r\a" "Aleurites cordata (thunb.) R. Br. ex Steudel.\r\a"
[62,] "663\r\a" "B\r\a" "Aleurites cordata (thunb.) R. Br. ex Steudel.\r\a"
[,4] [,5] [,6]
[1,] "Traits\r\a" "Value\r\a" "Notes\r\a"
[2,] "Maximum heighyt (m)\r\a" "18.17\r\a" "\r\a"
[3,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "1.15\r\a" "\r\a"
[4,] "Length of fruit (mm)\r\a" "8\r\a" "\r\a"
[5,] "Pollination mode\r\a" "Anemophily\r\a" "\r\a"
[6,] "Type of fruit\r\a" "Wing-hair\r\a" "\r\a"
[7,] "Stem type\r\a" "Woody\r\a" "\r\a"
[8,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "3.816887653\r\a" "\r\a"
[9,] "Maximum height (m)\r\a" "24.7\r\a" "\r\a"
[10,] "Vegetative spread distance (m)\r\a" "0\r\a" "\r\a"
[11,] "Vegetative reproduction\r\a" "None\r\a" "\r\a"
[12,] "Maximum heighyt (m)\r\a" "24\r\a" "\r\a"
[13,] "Maximum heighyt (m)\r\a" "45.7\r\a" "\r\a"
[14,] "Maximum heighyt (m)\r\a" "18\r\a" "\r\a"
[15,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "1.57\r\a" "\r\a"
[16,] "Length of fruit (mm)\r\a" "6\r\a" "\r\a"
[17,] "Pollination mode\r\a" "Anemophily\r\a" "\r\a"
[18,] "Type of fruit\r\a" "Wing-hair\r\a" "\r\a"
[19,] "Stem type\r\a" "Woody\r\a" "\r\a"
[20,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "5.629993917\r\a" "\r\a"
[21,] "Maximum height (m)\r\a" "1.5\r\a" "\r\a"
[22,] "Vegetative spread distance (m)\r\a" "1\r\a" "\r\a"
[23,] "Vegetative reproduction\r\a" "Rhyzome\r\a" "\r\a"
[24,] "Stem type\r\a" "Woody\r\a" "\r\a"
[25,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "5.870242245\r\a" "\r\a"
[26,] "Maximum height (m)\r\a" "12.1\r\a" "\r\a"
[27,] "Vegetative spread distance (m)\r\a" "0\r\a" "\r\a"
[28,] "Vegetative reproduction\r\a" "Sprout at the base of stem\r\a" "\r\a"
[29,] "Maximum heighyt (m)\r\a" "9.2\r\a" "\r\a"
[30,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "8.86\r\a" "\r\a"
[31,] "Length of fruit (mm)\r\a" "15\r\a" "\r\a"
[32,] "Pollination mode\r\a" "Entomophily\r\a" "\r\a"
[33,] "Type of fruit\r\a" "Wing-hair\r\a" "\r\a"
[34,] "Maximum heighyt (m)\r\a" "18.3\r\a" "\r\a"
[35,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "7.24\r\a" "\r\a"
[36,] "Length of fruit (mm)\r\a" "23\r\a" "\r\a"
[37,] "Pollination mode\r\a" "Entomophily\r\a" "\r\a"
[38,] "Type of fruit\r\a" "Wing-hair\r\a" "\r\a"
[39,] "Maximum heighyt (m)\r\a" "13.5\r\a" "\r\a"
[40,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "27.05059776\r\a" "\r\a"
[41,] "Length of fruit (mm)\r\a" "15\r\a" "\r\a"
[42,] "Pollination mode\r\a" "Entomophily\r\a" "\r\a"
[43,] "Type of fruit\r\a" "Wing-hair\r\a" "\r\a"
[44,] "Vegetative spread distance (m)\r\a" "0\r\a" "\r\a"
[45,] "Stem type\r\a" "Woody\r\a" "\r\a"
[46,] "Vegetative reproduction\r\a" "None\r\a" "\r\a"
[47,] "Stem type\r\a" "Herbaceous\r\a" "\r\a"
[48,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "3.236520779\r\a" "\r\a"
[49,] "Maximum height (m)\r\a" "0.61\r\a" "\r\a"
[50,] "Vegetative spread distance (m)\r\a" "0\r\a" "\r\a"
[51,] "Vegetative reproduction\r\a" "Sprout at the base of stem\r\a" "\r\a"
[52,] "Stem type\r\a" "Liana\r\a" "\r\a"
[53,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "5.547807614\r\a" "\r\a"
[54,] "Maximum height (m)\r\a" "16.3\r\a" "\r\a"
[55,] "Vegetative spread distance (m)\r\a" "10\r\a" "\r\a"
[56,] "Vegetative reproduction\r\a" "Rooting from vine\r\a" "\r\a"
[57,] "Maximum heighyt (m)\r\a" "16.8\r\a" "\r\a"
[58,] "Vegetative spread distance (m)\r\a" "0\r\a" "\r\a"
[59,] "Stem type\r\a" "Woody\r\a" "\r\a"
[60,] "Shade tolerance (min. relative light intensity, %), Anderson 1964. J. Ecol.\r\a" "12.19244\r\a" "\r\a"
[61,] "Maximum heighyt (m)\r\a" "9.7\r\a" "\r\a"
[62,] "Vegetative reproduction\r\a" "None\r\a" "\r\a"
[ reached getOption("max.print") -- omitted 863 rows ]