I am having trouble getting this code to work. I am trying to download documents from the FAO website in the URL. Please can someone help me? I use MAC OS and my chrome version is Version 106.0.5249.103 (Official Build) (x86_64)
library(rvest)
library(httr2)
library(RSelenium)
library(stringr)
url <- "https://www.fao.org/faolex/country-profiles/general-profile/en/?iso3=NAM"
base_url <- "https://www.fao.org"
ids <- read_html(url) %>%
html_elements(".doclink > a") %>%
html_attr("href") %>%
paste0(base_url, .)
grab_link <- function(page_url, s_ctl) {
# load the target url
s_ctl$navigate(page_url)
# wait for the page load to complete
Sys.sleep(4)
# getPageSource returns a list with html as the first element
page <- s_ctl$getPageSource()[[1]]
# Using rvest
read_html(page) %>%
html_elements(".item-title > a") %>%
html_attr("href") %>%
url_parse() %>%
purrr::pluck("query", "url")
}
selenium_driver <- rsDriver(
browser = "chrome",
chromever = "106.0.5249.61",
port = 4444L, #4545L,
verbose = FALSE,
check = FALSE
)
# control the client browser
ctl_browser <- selenium_driver[["client"]]
links <- purrr::map_chr(ids, grab_link, ctl_browser)
# Stop selenium server and quit browser
selenium_driver[["server"]]$stop()