-1

I have a data frame where I have NAs in one column (B). My goal is to fill in these NAs with the corresponding value of column E where column D has the same value as column C. I want to do that within each ID tier.

my data frame looks like this:

   ID  A  B  C  D  E
1   1  1  1 A1 A1 12
2   1  2 NA B1 E1 11
3   1  3  3 C1 C1 10
4   1  4  4 D1 D1  9
5   1  5 NA E1 B1  8
6   1  6  6 A2 A2  7
7   1  7 NA B2 E2  6
8   1  8  8 C2 C2  5
9   1  9  9 D2 D2  4
10  1 10 NA E2 B2  3
11  2  1  1 A1 A1 12
12  2  2 NA B1 E1 11
13  2  3  3 C1 C1 10
14  2  4  4 D1 D1  9
15  2  5 NA E1 B1  8
16  2  6  6 A2 A2  7
17  2  7 NA B2 E2  6
18  2  8  8 C2 C2  5
19  2  9  9 D2 D2  4
20  2 10 NA E2 B2  3

this is the desired output

   ID  A  B  C  D  E
1   1  1  1 A1 A1 12
2   1  2  8 B1 E1 11
3   1  3  3 C1 C1 10
4   1  4  4 D1 D1  9
5   1  5 11 E1 B1  8
6   1  6  6 A2 A2  7
7   1  7  3 B2 E2  6
8   1  8  8 C2 C2  5
9   1  9  9 D2 D2  4
10  1 10  6 E2 B2  3
11  2  1  1 A1 A1 12
12  2  2  8 B1 E1 11
13  2  3  3 C1 C1 10
14  2  4  4 D1 D1  9
15  2  5 11 E1 B1  8
16  2  6  6 A2 A2  7
17  2  7  3 B2 E2  6
18  2  8  8 C2 C2  5
19  2  9  9 D2 D2  4
20  2 10  6 E2 B2  3

here is the structure of my df:

structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2), A = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), B = c(1L, NA, 
3L, 4L, NA, 6L, NA, 8L, 9L, NA, 1L, NA, 3L, 4L, NA, 6L, NA, 8L, 
9L, NA), C = c("A1", "B1", "C1", "D1", "E1", "A2", "B2", "C2", 
"D2", "E2", "A1", "B1", "C1", "D1", "E1", "A2", "B2", "C2", "D2", 
"E2"), D = c("A1", "E1", "C1", "D1", "B1", "A2", "E2", "C2", 
"D2", "B2", "A1", "E1", "C1", "D1", "B1", "A2", "E2", "C2", "D2", 
"B2"), E = c(12L, 11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 12L, 
11L, 10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L)), row.names = c(NA, -20L
), class = "data.frame")

and of the desired output:

structure(list(ID = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2), A = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), B = c(1, 8, 3, 
4, 11, 6, 3, 8, 9, 6, 1, 8, 3, 4, 11, 6, 3, 8, 9, 6), C = c("A1", 
"B1", "C1", "D1", "E1", "A2", "B2", "C2", "D2", "E2", "A1", "B1", 
"C1", "D1", "E1", "A2", "B2", "C2", "D2", "E2"), D = c("A1", 
"E1", "C1", "D1", "B1", "A2", "E2", "C2", "D2", "B2", "A1", "E1", 
"C1", "D1", "B1", "A2", "E2", "C2", "D2", "B2"), E = c(12L, 11L, 
10L, 9L, 8L, 7L, 6L, 5L, 4L, 3L, 12L, 11L, 10L, 9L, 8L, 7L, 6L, 
5L, 4L, 3L)), row.names = c(NA, -20L), class = "data.frame")

thank you in advance!

best, Luise

luise
  • 3
  • 3

2 Answers2

1

After grouping by ID, if the value for B is NA then replace with E indexed by first match of columns C and D:

library(dplyr)

df %>%
  group_by(ID) %>%
  mutate(B = ifelse(is.na(B), E[match(C, D)], B))

Output

      ID     A     B C     D         E
   <dbl> <int> <int> <chr> <chr> <int>
 1     1     1     1 A1    A1       12
 2     1     2     8 B1    E1       11
 3     1     3     3 C1    C1       10
 4     1     4     4 D1    D1        9
 5     1     5    11 E1    B1        8
 6     1     6     6 A2    A2        7
 7     1     7     3 B2    E2        6
 8     1     8     8 C2    C2        5
 9     1     9     9 D2    D2        4
10     1    10     6 E2    B2        3
11     2     1     1 A1    A1       12
12     2     2     8 B1    E1       11
13     2     3     3 C1    C1       10
14     2     4     4 D1    D1        9
15     2     5    11 E1    B1        8
16     2     6     6 A2    A2        7
17     2     7     3 B2    E2        6
18     2     8     8 C2    C2        5
19     2     9     9 D2    D2        4
20     2    10     6 E2    B2        3
Ben
  • 28,684
  • 5
  • 23
  • 45
1

An option is also to use coalesce after grouping by 'ID'

library(dplyr) # 1.1.0
df %>% 
   mutate(B = coalesce(B, E[match(C, D)]), .by = 'ID')

-output

   ID  A  B  C  D  E
1   1  1  1 A1 A1 12
2   1  2  8 B1 E1 11
3   1  3  3 C1 C1 10
4   1  4  4 D1 D1  9
5   1  5 11 E1 B1  8
6   1  6  6 A2 A2  7
7   1  7  3 B2 E2  6
8   1  8  8 C2 C2  5
9   1  9  9 D2 D2  4
10  1 10  6 E2 B2  3
11  2  1  1 A1 A1 12
12  2  2  8 B1 E1 11
13  2  3  3 C1 C1 10
14  2  4  4 D1 D1  9
15  2  5 11 E1 B1  8
16  2  6  6 A2 A2  7
17  2  7  3 B2 E2  6
18  2  8  8 C2 C2  5
19  2  9  9 D2 D2  4
20  2 10  6 E2 B2  3
akrun
  • 874,273
  • 37
  • 540
  • 662