0

I want to parallelize my GRASS GIS analysis and calculations using the package rgrass7 from within a targets pipeline. As I am a newbie to GRASS, I am not sure how to correctly set up multiple mapsets in the same GRASSGIS database. So far I tried the following code:

library(future)
library(targets)

tar_script({
  options(tidyverse.quiet = TRUE)

  GRASS_DIRECTORY <- "grassdata"
  if (!fs::dir_exists(GRASS_DIRECTORY)) {
    fs::dir_create(GRASS_DIRECTORY)
  } else {
    fs::dir_delete(GRASS_DIRECTORY)
    fs::dir_create(GRASS_DIRECTORY)
  }

  generate_lines <-
    function() {
      coordinates <- tribble(
        ~x, ~y, ~feature_id, ~streamorder,
        7, 1, 1, 1,
        7, 3, 1, 1,
        7, 3, 17, 2,
        9, 4, 17, 2,
      )

      # coordinates <-
      # coordinates %>%
      #   mutate(feature_id = rep(1:(nrow(coordinates)/2)))

      sf_lines <-
        coordinates %>%
        mutate(point = str_c(x, " ", y)) %>%
        group_by(feature_id, streamorder) %>%
        summarise(point_collection = str_c(point, collapse = ", "), .groups = "drop") %>%
        mutate(geometry = str_c("LINESTRING (", point_collection, ")")) %>%
        select(-point_collection) %>%
        st_as_sf(wkt = "geometry") %>%
        mutate(across(all_of(c("feature_id", "streamorder")), as.integer))

      st_crs(sf_lines) <- 3035

      return(sf_lines)
    }

  initiate_grass_db <-
    function(sf_lines, streamorder) {
      grass_streamorder_directory <-
        "streamorder" %>%
        str_c(streamorder) %>%
        str_c(GRASS_DIRECTORY, ., sep = "/")
      fs::dir_create(grass_streamorder_directory)

      grass_streamorder_location <-
        grass_streamorder_directory %>%
        str_c("location", sep = "/")
      fs::dir_create(grass_streamorder_location)

      grass_streamorder_mapset <-
        grass_streamorder_directory %>%
        str_c("mapset", sep = "/")
      fs::dir_create(grass_streamorder_mapset)

      initGRASS(
        gisBase = "C:/Program Files/GRASS GIS 7.8",
        gisDbase = grass_streamorder_directory,
        location = basename(grass_streamorder_location),
        mapset = "PERMANENT",
        override = TRUE
      )

      execGRASS("g.proj",
        flags = c("c", "quiet"),
        proj4 = st_crs(sf_lines)$proj4string
      )
      b_box <- st_bbox(sf_lines)
      execGRASS("g.region",
        flags = c("quiet"),
        n = as.character(b_box["ymax"]), s = as.character(b_box["ymin"]),
        e = as.character(b_box["xmax"]), w = as.character(b_box["xmin"]),
        res = as.character(1)
      )
    }
  tar_option_set(
    packages = c(
      "rgrass7",
      "fs",
      "sf",
      "future",
      "tidyverse"
    ),
    memory = "transient",
    garbage_collection = TRUE
  )

  future::plan(future::multisession)

  list(
    tar_target(
      data,
      generate_lines()
    ),
    tar_target(
      streamorders,
      data %>%
        distinct(streamorder) %>%
        pull(streamorder)
    ),
    tar_target(
      init_grass,
      initiate_grass_db(data, streamorders),
      pattern = map(streamorders)
    )
  )
})

targets::tar_make_future(workers = 4)
#> [34m*[39m run target data
#> [34m*[39m run target streamorders
#> [34m*[39m run branch init_grass_940ac7e4
#> [34m*[39m run branch init_grass_50f0d53a
#> FEHLER: Unable to open element file <> for <DEFAULT_WIND@PERMANENT>
#> WARNUNG: Datum <Unknown_based_on_GRS80_ellipsoid> von GRASS nicht erkannt
#>          und keine Parameter gefunden.
#> FEHLER: Unable to open element file <> for <DEFAULT_WIND@PERMANENT>
#> FEHLER: Unable to open element file <> for <DEFAULT_WIND@PERMANENT>
#> WARNUNG: Datum <Unknown_based_on_GRS80_ellipsoid> von GRASS nicht erkannt
#>          und keine Parameter gefunden.
#> [34m*[39m end pipeline
#> Warnmeldungen:
#> 1: Ausführung von Kommando 'g.proj.exe -w' ergab Status 1 
#> 2: 1 targets produced warnings. Run tar_meta(fields = warnings) for the messages. 

Created on 2021-03-16 by the reprex package (v0.3.0)

I am using rgrass7_0.2-5 and standalone winGRASS 7.8.5-2 all installed in default directories with

R version 4.0.3 (2020-10-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows Server x64 (build 17763)

I would really appreciate some hints on how to do that!

MxNl
  • 371
  • 2
  • 9
  • From what I can tell, it looks like you are using `targets` correctly, but I get "C:/Program Files/GRASS GIS 7.8 not found" when I try to run your code. Can you reproduce the error without `targets`? Maybe with only `future` or just by itself? – landau Mar 16 '21 at 13:25
  • @landau: I can not reproduce your error without `targets`. If I run the `targets` pipeline sequentially without `future::plan(future::multisession)` it works fine. It think it is related to `future` somehow. Maybe it is for the same reason why it is not possible to make a global connection to a postgres db with `DBI` when using `future::plan(future::multisession)`. There I also need to set the connection in every target. – MxNl Mar 16 '21 at 16:00
  • 1
    Connection objects are [not exportable](https://cran.r-project.org/web/packages/future/vignettes/future-4-non-exportable-objects.html). As you say, each target should create and clean up any DB connection object it uses. – landau Mar 16 '21 at 16:02
  • @Roger Bivand: Do you have any tips here? – MxNl Apr 28 '21 at 08:30
  • I think Pat Schratz summed it up well at https://github.com/ropensci/targets/discussions/429. That sketch is specific to a use case involving time stamps, but the basic concepts generalize. – landau Apr 28 '21 at 14:55
  • @landau: Thanks, that's great, but as this didn't solve the problem I think the problem is that I haven't fully understood the setup of a mapset in GRASS GIS or so. – MxNl May 04 '21 at 16:00

0 Answers0