1

I have a list of list like below:

lst<- list(`101-01-101` = list(Demographics = structure(list(SubjectID = c("SubjectID", 
"101-01-101"), BRTHDTC = c("BRTHDTC", "1953-07-07"), SEX = c("SEX", 
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID", 
"101-01-101"), DSDT = c("DSDT", "2016-03-14"), DSDT_P = c("DSDT_P", 
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-01-101"
), Visit = c("Visit", "Screening: -28 Days to Day 1"), VISND = c("VISND", 
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
))), `101-02-102` = list(Demographics = structure(list(SubjectID = c("SubjectID", 
"101-02-102"), BRTHDTC = c("BRTHDTC", "1963-07-02"), SEX = c("SEX", 
"Female")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
"data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID", 
"101-02-102"), DSDT = c("DSDT", "2017-04-04"), DSDT_P = c("DSDT_P", 
NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", "data.frame"
)), Visits = structure(list(SubjectID = c("SubjectID", "101-02-102"
), Visit = c("Visit", NA), VISND = c("VISND", NA)), row.names = c(NA, 
-2L), class = c("tbl_df", "tbl", "data.frame"))), `101-03-103` = list(
    Demographics = structure(list(SubjectID = c("SubjectID", 
    "101-03-103"), BRTHDTC = c("BRTHDTC", "1940-09-11"), SEX = c("SEX", 
    "Male")), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
    "data.frame")), DiseaseStatus = structure(list(SubjectID = c("SubjectID", 
    "101-03-103"), DSDT = c("DSDT", NA), DSDT_P = c("DSDT_P", 
    NA)), row.names = c(NA, -2L), class = c("tbl_df", "tbl", 
    "data.frame")), Visits = structure(list(SubjectID = c("SubjectID", 
    "101-03-103"), Visit = c("Visit", "Screening: -28 Days to Day 1"
    ), VISND = c("VISND", NA)), row.names = c(NA, -2L), class = c("tbl_df", 
    "tbl", "data.frame"))))

I would like to change subjectID input to NA if a df in the list only have the value in SubjectID but NA for all other variables. what should I do?

I am thinking of counting none NA input for each df, if the value equals 1, then remove all records or change SubjectID to NA. I would like to still keep the col names, But how to do that?

In my example file, 101-02-102 should have Visits as empty df, and 101-03-103 should have DiseaseStatus as empty df.

Stataq
  • 2,237
  • 6
  • 14

2 Answers2

3

Updated Solution

I have updated my solution based on new requirements by OP, so that SubjectID in second row will be NA provided that all other variables are also NAs.

library(purrr)

lst %>%
  map(~ .x %>% 
        map(~ if(all(is.na(.x[2, -1]))) {
          .x %>%
            mutate(SubjectID = if_else(row_number() == 1, SubjectID, NA_character_))
        } else {
          .x
        }))

$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 2 x 3
  SubjectID  BRTHDTC    SEX   
  <chr>      <chr>      <chr> 
1 SubjectID  BRTHDTC    SEX   
2 101-01-101 1953-07-07 Female

$`101-01-101`$DiseaseStatus
# A tibble: 2 x 3
  SubjectID  DSDT       DSDT_P
  <chr>      <chr>      <chr> 
1 SubjectID  DSDT       DSDT_P
2 101-01-101 2016-03-14 NA    

$`101-01-101`$Visits
# A tibble: 2 x 3
  SubjectID  Visit                        VISND
  <chr>      <chr>                        <chr>
1 SubjectID  Visit                        VISND
2 101-01-101 Screening: -28 Days to Day 1 NA   


$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 2 x 3
  SubjectID  BRTHDTC    SEX   
  <chr>      <chr>      <chr> 
1 SubjectID  BRTHDTC    SEX   
2 101-02-102 1963-07-02 Female

$`101-02-102`$DiseaseStatus
# A tibble: 2 x 3
  SubjectID  DSDT       DSDT_P
  <chr>      <chr>      <chr> 
1 SubjectID  DSDT       DSDT_P
2 101-02-102 2017-04-04 NA    

$`101-02-102`$Visits
# A tibble: 2 x 3
  SubjectID Visit VISND
  <chr>     <chr> <chr>
1 SubjectID Visit VISND
2 NA        NA    NA   


$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 2 x 3
  SubjectID  BRTHDTC    SEX  
  <chr>      <chr>      <chr>
1 SubjectID  BRTHDTC    SEX  
2 101-03-103 1940-09-11 Male 

$`101-03-103`$DiseaseStatus
# A tibble: 2 x 3
  SubjectID DSDT  DSDT_P
  <chr>     <chr> <chr> 
1 SubjectID DSDT  DSDT_P
2 NA        NA    NA    

$`101-03-103`$Visits
# A tibble: 2 x 3
  SubjectID  Visit                        VISND
  <chr>      <chr>                        <chr>
1 SubjectID  Visit                        VISND
2 101-03-103 Screening: -28 Days to Day 1 NA  
Anoushiravan R
  • 21,622
  • 3
  • 18
  • 41
2

as it is a nested list, use double lapply, and on the inner list, element, get the colSums of logical matrix on NA, check if it is equal to number of rows (nrow), then assing the SubjectID to NA, return the data

lst2 <- lapply(lst1, function(x) {x <- lapply(x, function(y) 
    {if(all(colSums(is.na(y[-1])) == nrow(y))) y$SubjectID <- NA_character_
          y}); x})

-output

lst2
$`101-01-101`
$`101-01-101`$Demographics
# A tibble: 1 x 3
  SubjectID  BRTHDTC    SEX   
  <chr>      <chr>      <chr> 
1 101-01-101 1953-07-07 Female

$`101-01-101`$DiseaseStatus
# A tibble: 1 x 3
  SubjectID  DSDT       DSDT_P
  <chr>      <chr>      <chr> 
1 101-01-101 2016-03-14 <NA>  

$`101-01-101`$Visits
# A tibble: 1 x 3
  SubjectID  Visit                        VISND
  <chr>      <chr>                        <chr>
1 101-01-101 Screening: -28 Days to Day 1 <NA> 


$`101-02-102`
$`101-02-102`$Demographics
# A tibble: 1 x 3
  SubjectID  BRTHDTC    SEX   
  <chr>      <chr>      <chr> 
1 101-02-102 1963-07-02 Female

$`101-02-102`$DiseaseStatus
# A tibble: 1 x 3
  SubjectID  DSDT       DSDT_P
  <chr>      <chr>      <chr> 
1 101-02-102 2017-04-04 <NA>  

$`101-02-102`$Visits
# A tibble: 1 x 3
  SubjectID Visit VISND
  <chr>     <chr> <chr>
1 <NA>      <NA>  <NA> 


$`101-03-103`
$`101-03-103`$Demographics
# A tibble: 1 x 3
  SubjectID  BRTHDTC    SEX  
  <chr>      <chr>      <chr>
1 101-03-103 1940-09-11 Male 

$`101-03-103`$DiseaseStatus
# A tibble: 1 x 3
  SubjectID DSDT  DSDT_P
  <chr>     <chr> <chr> 
1 <NA>      <NA>  <NA>  

$`101-03-103`$Visits
# A tibble: 1 x 3
  SubjectID  Visit                        VISND
  <chr>      <chr>                        <chr>
1 101-03-103 Screening: -28 Days to Day 1 <NA> 

For the updated case

lapply(lst, function(x) {x <- lapply(x, function(y)     
       {if(all(colSums(is.na(y[-1, -1])) == nrow(y[-1, ]))) 
        y$SubjectID <- NA_character_    
             y[-1,]})
 x})
akrun
  • 874,273
  • 37
  • 540
  • 662