8

I am using the 'segmented' package to find break points in linear regressions in R

library(tidyverse)
library(segmented)

df <- data.frame(x = c(1:10), y = c(1,1,1,1,1,6:10))
lm_model <- lm(y ~ x, data = df)
seg_model <- segmented(obj = lm_model, seg.Z = ~ x)

But if I run the same model within a purrr:map, segmented fails.

map_test <- df %>% 
  nest() %>%
  mutate(map_lm = map(data, ~lm(y ~ x, data = .)),
         param_map_lm = map(map_lm, tidy))

map_lm_model <- map_test[[2]][[1]]

map_seg_model <- segmented(obj = map_lm_model, seg.Z = ~ x)

"Error in is.data.frame(data) : object '.' not found"

When taking the lm obj from the lm extracted from the map output, segmented fails to find the underlying data.

The two linear model objects, appear, however, identical.

What I actually need to do is a more useful map to run lm over multiple sub-sets of a dataframe, then run 'segmented' on the resulting lm.

Julian
  • 6,586
  • 2
  • 9
  • 33
user73522
  • 97
  • 3

3 Answers3

4

This is basically the same issue as the interaction between glm() and purrr::map().

lm() captures the expression supplied to it, which works well as a stand-alone case. However, when called by map(), the supplied expression is ., which has no meaning outside the immediate context of that map() call and results in the error you are observing.

As with the other question, one workaround is to define a wrapper for lm() that composes a custom call directly on the dataset, which is then captured by lm() as an unevaluated expression.

# Composes a custom lm() expression and evaluates it
lm2 <- function(data, ...)
    eval( rlang::expr(lm(data=!!rlang::enexpr(data), !!!list(...))) )

# Now mapping using lm2, instead of lm
map_test <- nest(df, data=everything()) %>% 
    mutate(map_lm       = map(data, lm2, y ~ x),
           param_map_lm = map(map_lm, broom::tidy))

# The data is stored directly inside the lm object
# segmented() now has no problems accessing it
map_lm_model <- map_test[[2]][[1]]
map_seg_model <- segmented(obj = map_lm_model, seg.Z = ~ x)
# Call: segmented.lm(obj = map_lm_model, seg.Z = ~x)
# 
# Meaningful coefficients of the linear terms:
# (Intercept)            x         U1.x  
#   1.000e+00    6.344e-15    1.607e+00  
# 
# Estimated Break-Point(s):
# psi1.x  
#  3.889  

or as a single mutate() chain:

map_test <- nest(df, data=everything()) %>% 
    mutate(map_lm       = map(data, lm2, y ~ x),
           param_map_lm = map(map_lm, broom::tidy),
           seg_lm       = map(map_lm, segmented, seg.Z=~x))
# # A tibble: 1 x 4
#             data map_lm param_map_lm     seg_lm    
#   <list<df[,2]>> <list> <list>           <list>    
# 1       [10 × 2] <lm>   <tibble [2 × 5]> <segmentd>
Artem Sokolov
  • 13,196
  • 4
  • 43
  • 74
2

You could store model data along with model object, and replace . in model object's data name by stored data name:

map_test <- df %>% 
  nest(data=everything()) %>%
  mutate(map_lm = map(data, ~lm(y ~ x, data = . )),
         data_map_lm = data, # Store model data
         param_map_lm = map(map_lm, broom::tidy))

map_lm_model <- map_test$map_lm[[1]]
map_lm_data <- map_test$data_map_lm[[1]]

# Set stored data name in model
map_lm_model$call$data <- as.name("map_lm_data")

map_seg_model <- segmented(obj = map_lm_model, seg.Z = ~ x)
map_seg_model

# Call: segmented.lm(obj = map_lm_model, seg.Z = ~x)
# 
# Meaningful coefficients of the linear terms:
#   (Intercept)            x         U1.x  
# 1.000e+00   -1.874e-08    1.607e+00  
# 
# Estimated Break-Point(s):
#   psi1.x  
# 3.889  

On a nest with more levels:

df <- data.frame(x = c(c(1:10),c(11:20)),
                 y = rep(c(1,1,1,1,1,6:10),2),
                 level=c(rep('a',10),rep('b',10)))


map_test <- df %>%
  nest(data=c(x,y)) %>%
  mutate(map_lm = map(data, ~lm(y ~ x, data = . )),
         data_map_lm = data,
         param_map_lm = map(map_lm, broom::tidy))


map_test %>% pmap(~with(list(...),{
  map_lm$call$data <- as.name("data_map_lm")
  segmented(obj = map_lm, seg.Z = ~ x)
}))

#> [[1]]
#> Call: segmented.lm(obj = map_lm_model, seg.Z = ~x)
#> 
#> Meaningful coefficients of the linear terms:
#> (Intercept)            x         U1.x  
#>   1.000e+00   -1.874e-08    1.607e+00  
#> 
#> Estimated Break-Point(s):
#> psi1.x  
#>  3.889  
#> 
#> [[2]]
#> Call: segmented.lm(obj = map_lm_model, seg.Z = ~x)
#> 
#> Meaningful coefficients of the linear terms:
#> (Intercept)            x         U1.x  
#>   1.000e+00   -1.937e-08    1.607e+00  
#> 
#> Estimated Break-Point(s):
#> psi1.x  
#>  13.89
Waldi
  • 39,242
  • 6
  • 30
  • 78
2

An option could be using map2 from purrr to immediately use the "map_lm" model on the segmented model. In that case .y is used so that the data keeps the same name as use with the lm model. Here is a reproducible example:

library(dplyr)
library(tidyr)
library(segmented)
library(broom)
library(purrr)

df <- data.frame(x = c(1:10), y = c(1,1,1,1,1,6:10))
lm_model <- lm(y ~ x, data = df)
seg_model <- segmented(obj = lm_model, seg.Z = ~ x)

# Combine in one dataframe
map_test <- df %>% 
  nest(data = everything()) %>%
  mutate(map_lm = purrr::map(data, ~lm(y ~ x, data = .)),
         map_seg = purrr::map2(data, map_lm, ~segmented(.y, seg.Z = ~ x)))

# Show segmented model
map_test[[3]]
#> [[1]]
#> Call: segmented.lm(obj = .y, seg.Z = ~x)
#> 
#> Meaningful coefficients of the linear terms:
#> (Intercept)            x         U1.x  
#>   1.000e+00   -1.874e-08    1.607e+00  
#> 
#> Estimated Break-Point(s):
#> psi1.x  
#>  3.889

# Plot lm and seg model
plot(map_test$data[[1]])
lines(fitted(map_test$map_lm[[1]]), col = "red")
lines(fitted(map_test$map_seg[[1]]), col = "blue")
legend(x = "topleft", 
       legend = c("lm", "seg"),
       pch = 15,
       col = c("red", "blue"))

Created on 2022-09-04 with reprex v2.0.2

Quinten
  • 35,235
  • 5
  • 20
  • 53