0

I need to apply a function that takes two tibbles / data.frames as input, and outputs a list inside a grouped tibble. And the resulting lists should then be put in a column.

So, I have this tibble

library(dplyr)
library(purrr)

> out <- structure(list(session = c("0000", "0000"), bundle = c("msajc003", 
"msajc057"), x1 = list(structure(list(labels = "", start = 187.425, 
    end = 2604.425, db_uuid = "0fc618dc-8980-414d-8c7a-144a649ce199", 
    start_item_id = 8L, end_item_id = 8L, level = "Utterance", 
    attribute = "Utterance", start_item_seq_idx = 1L, end_item_seq_idx = 1L, 
    type = "ITEM", sample_start = 3749L, sample_end = 52088L, 
    sample_rate = 20000L, listOfFiles = "/private/var/folders/lr/h3mlkmq540d6xjrh3bpdms600000gn/T/Rtmpb1f0sQ/emuR_demoData/ae_emuDB/0000_ses/msajc003_bndl/msajc003.wav"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")), structure(list(
    labels = "", start = 299.975, end = 2794.925, db_uuid = "0fc618dc-8980-414d-8c7a-144a649ce199", 
    start_item_id = 8L, end_item_id = 8L, level = "Utterance", 
    attribute = "Utterance", start_item_seq_idx = 1L, end_item_seq_idx = 1L, 
    type = "ITEM", sample_start = 6000L, sample_end = 55898L, 
    sample_rate = 20000L, listOfFiles = "/private/var/folders/lr/h3mlkmq540d6xjrh3bpdms600000gn/T/Rtmpb1f0sQ/emuR_demoData/ae_emuDB/0000_ses/msajc057_bndl/msajc057.wav"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))), x2 = list(structure(list(
    labels = c("V", "V"), start = c(187.425, 340.175), end = c(256.925, 
    426.675), db_uuid = c("0fc618dc-8980-414d-8c7a-144a649ce199", 
    "0fc618dc-8980-414d-8c7a-144a649ce199"), start_item_id = c(147L, 
    149L), end_item_id = c(147L, 149L), level = c("Phonetic", 
    "Phonetic"), attribute = c("Phonetic", "Phonetic"), start_item_seq_idx = c(1L, 
    3L), end_item_seq_idx = c(1L, 3L), type = c("SEGMENT", "SEGMENT"
    ), sample_start = c(3749L, 6804L), sample_end = c(5138L, 
    8533L), sample_rate = c(20000L, 20000L), listOfFiles = c("/private/var/folders/lr/h3mlkmq540d6xjrh3bpdms600000gn/T/Rtmpb1f0sQ/emuR_demoData/ae_emuDB/0000_ses/msajc003_bndl/msajc003.wav", 
    "/private/var/folders/lr/h3mlkmq540d6xjrh3bpdms600000gn/T/Rtmpb1f0sQ/emuR_demoData/ae_emuDB/0000_ses/msajc003_bndl/msajc003.wav"
    )), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), structure(list(labels = "V", start = 1943.175, end = 2037.425, 
    db_uuid = "0fc618dc-8980-414d-8c7a-144a649ce199", start_item_id = 189L, 
    end_item_id = 189L, level = "Phonetic", attribute = "Phonetic", 
    start_item_seq_idx = 28L, end_item_seq_idx = 28L, type = "SEGMENT", 
    sample_start = 38864L, sample_end = 40748L, sample_rate = 20000L, 
    listOfFiles = "/private/var/folders/lr/h3mlkmq540d6xjrh3bpdms600000gn/T/Rtmpb1f0sQ/emuR_demoData/ae_emuDB/0000_ses/msajc057_bndl/msajc057.wav"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame")))), class = c("rowwise_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -2L), groups = structure(list(
    .rows = structure(list(1L, 2L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame")))

> out
# A tibble: 2 × 4
# Rowwise: 
  session bundle   x1                x2               
  <chr>   <chr>    <list>            <list>           
1 0000    msajc003 <tibble [1 × 15]> <tibble [2 × 15]>
2 0000    msajc057 <tibble [1 × 15]> <tibble [1 × 15]>

and a simple function that takes two data.frames / tibbles as inputs

fake_two_df_fun  <- function(x1,x2){
  return(list(dim(x1),dim(x2)))
}

then I want to apply it to every group in the tibble and collect the output into a list column.

I prefer pmap as I, in the more expanded use, need to supply many more arguments in addition to the two tibbles / data.frames, to the called function. An the function I wish to apply is now known now, only that it will take two tibbles as the two first arguments (but also possibly more arguments, which is why I need pmap)

Fredrik Karlsson
  • 485
  • 8
  • 21

1 Answers1

0

Assuming a list of the outputs of fake_two_df_fun is desired:

library(dplyr)

out %>%
 rowwise %>%
 mutate(dims = list(fake_two_df_fun(x1, x2))) %>%
 ungroup

giving:

# A tibble: 2 × 5
  session bundle   x1                x2                dims      
  <chr>   <chr>    <list>            <list>            <list>    
1 0000    msajc003 <tibble [1 × 15]> <tibble [2 × 15]> <list [2]>
2 0000    msajc057 <tibble [1 × 15]> <tibble [1 × 15]> <list [2]>
G. Grothendieck
  • 254,981
  • 17
  • 203
  • 341
  • Yes, sorry. The point of me needing, I think, pmap is that I will not know what the exact signature of the function will be. Only that the first two args are tibbles or data.frames. Sorry that I did not make that clearer. And in my edit of the question I also made it clearer now that the `fake_two_df_fun` is just one possible function that I want to apply. They (the applied functions) will take two tibbles, but may also take additional arguments. Which will then be present in other columns. So the hard-coded application of the function call like in your example will not work unfortunately. – Fredrik Karlsson Mar 15 '23 at 19:17