Answer with Example
astro.comma's answer pointed out where I needed to go to get the HERE API for batches -- and that strictly speaking is the answer and why it is marked as such. For those that pass by here later, this is my testing script that I used to figure out how to implement the request in R, based on the help I got from astro.comma.
Sample Data:
Console:
df_locations[1:5,] # Show a sample of the data in the data frame
># A tibble: 5 x 3
> recID country postalCode
> <int> <fct> <chr>
>1 1 CAN L4T1G3
>2 2 USA 62521
>3 3 CAN H9P1K2
>4 4 CAN L6S4K6
>5 5 USA 52632
dput(df_locations[1:5,]) # For ease of reproducibility, here's dput():
structure(list(recID = 1:5, country = structure(c(1L, 2L, 1L,
1L, 2L), .Label = c("CAN", "USA", "MEX"), class = "factor"),
postalCode = c("L4T1G3", "62521", "H9P1K2", "L6S4K6", "52632"
)), row.names = c(NA, -5L), class = c("tbl_df", "tbl", "data.frame"
))
Script:
library(httr)
library(tidyverse)
HERE_API_KEY <- "YOU-CANT-HAVE-THIS-BECAUSE-ITS-MINE"
url <- "https://batch.geocoder.ls.hereapi.com/6.2/jobs"
# Write df_locations to pipe-delimited text file to prep for POST
write.table(
df_locations,
file = "locations.txt",
quote = FALSE,
sep = "|",
row.names = FALSE
)
# Assemble the POST request url to start the job
theRequest <-
paste0(
url,
"?&apiKey=",
HERE_API_KEY,
"&action=run&header=true",
"&indelim=%7C&outdelim=%7C",
"&outcols=recId%2CseqNumber%2CseqLength%2CdisplayLatitude",
"%2CdisplayLongitude%2Ccity%2CpostalCode%2Ccountry",
"&outputCombined=true"
)
# Now submit the POST request along with the location file
theResponse <-
POST(url = theRequest, body = upload_file("locations.txt"))
Console:
>theResponse
Response [https://batch.geocoder.ls.hereapi.com/6.2/jobs?&apiKey=YOU-CANT-HAVE-THIS-BECAUSE-ITS-MINE&action=run&header=true&indelim=%7C&outdelim=%7C&outcols=recId%2CseqNumber%2CseqLength%2CdisplayLatitude%2CdisplayLongitude%2Ccity%2CpostalCode%2Ccountry&outputCombined=true]
Date: 2021-12-27 00:45
Status: 200
Content-Type: application/json;charset=utf-8
Size: 209 B
Script:
# Extract the Request ID so we can check for completion status of the job, and
# use it to identify / download the zip file when complete.
reqID <- content(theResponse)$Response$MetaInfo$RequestId
Console:
>reqID
[1] "XS9wSVt3y0Dch1Q48gX1xohewUKIw595" # or looks like this -- I changed it here.
Script:
# After letting some time pass (about a minute for my test file), I check
# status of the job with a GET request:
JOB_status <-
GET(paste0(url, "/", reqID, "?action=status&apiKey=", HERE_API_KEY))
Console:
>content(JOB_status)
$Response
$Response$MetaInfo
$Response$MetaInfo$RequestId
[1] "XS9wSVt3y0Dch1Q48gX1xohewUKIw595"
$Response$Status
[1] "completed" # There are other statuses (statii?), but this one we care about.
$Response$JobStarted
[1] "2021-12-27T00:46:36.000+0000"
$Response$JobFinished
[1] "2021-12-27T00:46:49.000+0000"
$Response$TotalCount
[1] 2080 # Ignore this -- I only provided you with first 5 rows
$Response$ValidCount
[1] 2080
$Response$InvalidCount
[1] 0
$Response$ProcessedCount
[1] 2080
$Response$PendingCount
[1] 0
$Response$SuccessCount
[1] 2076
$Response$ErrorCount
[1] 4
Script:
# I stayed with GET request via httr, but no reason you can't switch to some other
# method for download like cURL
COMPLETED_JOB <-
GET(paste0(url, "/", reqID, "/result?apiKey=", HERE_API_KEY))
job_content <- content(x = COMPLETED_JOB, as = "raw") # This extract hexidecimal data which is the zipped content -- has to get extracted to be useful.
writeBin(job_content, con = "Processed_locations.zip") # Writes the binary data to file.
unzip(zipfile = "Processed_locations.zip") # Extracts the zip file as its own text file.
End result file:
recId|SeqNumber|seqLength|recId|seqNumber|seqLength|displayLatitude|displayLongitude|city|postalCode|country
1|1|1|1|1|1|43.70924|-79.658|Mississauga|L4T 1G3|CAN
2|1|1|2|1|1|39.83972|-88.92881|Decatur|62521|USA
3|1|1|3|1|1|45.47659|-73.78061|Dorval|H9P 1K2|CAN
4|1|1|4|1|1|43.75666|-79.71021|Brampton|L6S 4K6|CAN
5|1|1|5|1|1|40.4013|-91.3848|Keokuk|52632|USA