1

I am trying to use dplyr::across to find the weighted centroid and variance for Latitude and Longitude for each "Early" and "Late" Periods. My data looks something like this:

dat <- data.frame(Latitude = c(35.8, 35.85, 36.7, 35.2, 36.1, 35.859, 36.0, 37.0, 35.1, 35.2),
                  Longitude = c(-89.4, -89.5, -89.4, -89.8, -90, -89.63, -89.7, -89, -88.9, -89),
                  Period = rep(c("early", "late"), each = 5),
                  ID = c("A", "A", "A", "B", "C", "C", "C", "D", "E", "E"))

Here's a function to calculate the weighted variance, standard deviation, etc.

#function for weighted var and sd
weighted.var <- function(x, w = NULL, na.rm = FALSE) {
  if (na.rm) {
    na <- is.na(x) | is.na(w)
    x <- x[!na]
    w <- w[!na]
  }
  
  sum(w * (x - weighted.mean(x, w)) ^ 2) / (sum(w) - 1)
}
weighted.sd <- function(x, w, na.rm = TRUE) sqrt(weighted.var(x, w, na.rm = TRUE))

How might I include two or more functions in dplyr::across so that I can summarize the 1) Longitude_mean, 2) Longitude_stddev, 3) Latitude_mean, 4) Latitude_stddev, by "Early" and "Late" Periods?

Below is an attempt but throws an error. Any help would be appreciated!

dat <- data.frame(Latitude = c(35.8, 35.85, 36.7, 35.2, 36.1, 35.859, 36.0, 37.0, 35.1, 35.2),
                  Longitude = c(-89.4, -89.5, -89.4, -89.8, -90, -89.63, -89.7, -89, -88.9, -89),
                  Period = rep(c("early", "late"), each = 5),
                  ID = c("A", "A", "A", "B", "C", "C", "C", "D", "E", "E"))

#function for weighted var and sd
weighted.var <- function(x, w = NULL, na.rm = FALSE) {
  if (na.rm) {
    na <- is.na(x) | is.na(w)
    x <- x[!na]
    w <- w[!na]
  }
  
  sum(w * (x - weighted.mean(x, w)) ^ 2) / (sum(w) - 1)
}
weighted.sd <- function(x, w, na.rm = TRUE) sqrt(weighted.var(x, w, na.rm = TRUE))

library(dplyr)
dat %>% 
  group_by(Period, ID) %>% 
  mutate(weight = 1/n()) %>% 
  group_by(Period) %>% 
  summarise(across(c(Longitude, Latitude),
                   ~ weighted.mean(.x, w = weight),
                   ~ weighted.sd(.x, w = weight)))

Thank you for advice. Best,

-nm

Nick Masto
  • 125
  • 8

2 Answers2

3

You have to use ~list and then the functions. This will return a list. Then you can apply unnest():

dat %>% 
    group_by(Period, ID) %>% 
    mutate(weight = 1/n()) %>% 
    group_by(Period) %>% 
    summarise(across(c(Longitude, Latitude), ~list(weighted.mean(.x, w = weight),
                                                   weighted.sd(.x, w = weight)))) %>% 
    unnest(cols = c(Longitude, Latitude))
 Period Longitude Latitude
  <chr>      <dbl>    <dbl>
1 early    -89.7     35.8  
2 early      0.289    0.600
3 late     -89.2     36.0  
4 late       0.401    0.931
TarJae
  • 72,363
  • 6
  • 19
  • 66
3

We could pass the functions inside the list and return a wide dataset

library(dplyr)
dat %>% 
  group_by(Period, ID) %>% 
  mutate(weight = 1/n()) %>% 
  group_by(Period) %>% 
  summarise(across(c(Longitude, Latitude),
                   list(weighted_mean = ~ weighted.mean(.x, w = weight),
                   weighted_sd = ~ weighted.sd(.x, w = weight))))

-output

# A tibble: 2 x 5
  Period Longitude_weighted_mean Longitude_weighted_sd Latitude_weighted_mean Latitude_weighted_sd
  <chr>                    <dbl>                 <dbl>                  <dbl>                <dbl>
1 early                    -89.7                 0.289                   35.8                0.600
2 late                     -89.2                 0.401                   36.0                0.931
akrun
  • 874,273
  • 37
  • 540
  • 662