Where to place NA.RM=TRUE in an equation?

Question

I am doing the following:

branques_Idescat_EU <- indexIPI_branques_EU_wide %>% 
  mutate(alimentació = round(C10*(C10/(C10+C11+C12)) + C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12)), 1),
         tèxtil = round(C13*(C13/(C13+C14+C15)) + C14*(C14/(C13+C14+C15)) + C15*(C15/(C13+C14+C15)), 1), 
         paper = round(C17*(C17/(C17+C18)) + C18*(C18/(C17+C18)), 1),
         químiques = C20,
         farmàcia = C21,
         plàstics = C22,
         minerals = C23,
         metalurgia = round(C24*(C24/(C24+C25)) + C25*(C25/(C24+C25)), 1),
         electrònica = round(C26*(C26/(C26+C27)) + C27*(C27/(C26+C27)), 1),
         maquinària = C28,
         transport = round(C29*(C29/(C29+C30)) + C30*(C30/(C29+C30)), 1),
         altres = round(C16*(C16/(C16+C31+C32+C33)) + C31*(C31/(C16+C31+C32+C33)) + C32*(C32/(C16+C31+C32+C33)) + C33*(C33/(C16+C31+C32+C33)),1),
         energia = D35,
         aigua = E36, 
         na.rm=TRUE) %>% 
  select(time, geo, alimentació:aigua)

And I need to add na.rm=TRUE in each new variable. For instance, in alimentacio I get NA because the variable C10 has only NAs values. But I don't know where to place na.rm=TRUE in the equation below:

alimentació = round(C10*(C10/(C10+C11+C12)) + C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12)), 1)

Sample data:

structure(list(geo = c("Alemanya", "Alemanya", "Alemanya", "Alemanya",

"Alemanya", "Alemanya", "Alemanya", "Alemanya", "Espanya", "Espanya", 
"Espanya", "Espanya", "Espanya", "Espanya", "Espanya", "Espanya", 
"Espanya", "Espanya", "Espanya", "Espanya", "Espanya"), time = c("oct. 2022", 
"nov. 2022", "des. 2022", "gen. 2023", "febr. 2023", "març 2023", 
"abr. 2023", "maig 2023", "gen. 2002", "febr. 2002", "març 2002", 
"abr. 2002", "maig 2002", "juny 2002", "jul. 2002", "ag. 2002", 
"set. 2002", "oct. 2002", "nov. 2002", "des. 2002", "gen. 2003"
), C10 = c(104.2, 111.5, 100.2, 100.7, 97.6, 116.7, 98, 108.7, 
84.1, 82.1, 82.3, 93.3, 93.9, 89.8, 96.2, 88.5, 89.7, 103, 98.9, 
87.2, 87.7), C11 = c(89.7, 100.6, 93.9, 90, 86.5, 103.6, 91.7, 
109.4, 89.8, 88.6, 84.1, 103.8, 114.2, 102.6, 130.8, 104.2, 95.7, 
120.6, 110.9, 93.8, 91.9), C12 = c(56.4, 71.4, 67, 64.3, 63.7, 
69.4, 62, 60.1, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA), C13 = c(86.3, 91.3, 69.1, 88, 84.5, 99.5, 82.3, 89.4, 211.7, 
207, 202.9, 228.5, 237.4, 206.8, 224, 68.5, 200.5, 232, 214.2, 
169.8, 200.2), C14 = c(65.2, 79.3, 74.6, 71.6, 68.8, 76.4, 67.3, 
76.9, 267.9, 284.1, 276.4, 229.6, 228.8, 218.1, 285.6, 201.7, 
283.9, 275, 227.4, 190.3, 243.4), C15 = c(107.5, 114, 84.8, 107.4, 
111.8, 130.8, 101.1, 110.9, 274.6, 281.8, 254.3, 255, 233.8, 
219.8, 295.9, 166.8, 262.9, 268.8, 235.6, 228.4, 265.4), C16 = c(93.5, 
98.9, 75.8, 81.2, 86, 104, 88.5, 93.2, 216, 214.9, 212.5, 244.9, 
246.2, 224.6, 245.4, 98.2, 218.4, 255.7, 252, 212, 222), C17 = c(89.2, 
92.2, 74, 88.3, 81.6, 91, 74.7, 81.8, 99, 94.3, 95.9, 102.8, 
102.5, 95.7, 104.3, 75.6, 94.4, 106.5, 98.6, 86.7, 100.2), C18 = c(70.1, 
75.7, 70, 61.6, 61.3, 71.6, 59.9, 61.9, 134.3, 129.3, 136.7, 
147.2, 146.9, 138.6, 137, 108.1, 140.4, 151.6, 150.3, 139.9, 
127.3), C19 = c(98.5, 98, 100.7, 89.9, 81.7, 92.5, 80.7, 73.1, 
81.5, 72.4, 79, 80.2, 81.1, 86.4, 83.7, 84.3, 77.3, 84.9, 81.4, 
94.4, 85.8), C20 = c(76, 80, 69.2, 84.6, 81.4, 88.3, 76.6, 78.7, 
90.5, 90.1, 90.2, 99.1, 99.2, 91.4, 102.1, 74.6, 97.6, 106.5, 
96.4, 81.2, 97.9), C21 = c(128.5, 127.3, 127.4, 117.8, 112.6, 
133.1, 117.6, 110.3, 72.4, 71.6, 65, 71.6, 72.7, 69.4, 79.1, 
33.7, 68.2, 83.2, 79.3, 60.9, 73.4), C22 = c(92.8, 98.1, 71.1, 
92, 93.4, 105.2, 86.7, 92.3, 113.4, 119, 111.3, 124.5, 123.8, 
113.1, 123.4, 75.4, 120.8, 126.8, 118.1, 91.7, 116.3), C23 = c(104.5, 
107, 79.5, 80.2, 85, 103, 88.9, 94.3, 229.5, 231.7, 235.6, 245.4, 
255.5, 241.3, 253.3, 178.9, 237.8, 266.8, 244.9, 200.2, 224.8
), C24 = c(84.1, 84.7, 68, 87.9, 86.3, 96.7, 84.8, 88.7, 117.1, 
118.2, 119.3, 127.4, 132.1, 124.1, 117.9, 76.2, 126.9, 137.5, 
122, 98.6, 117.7), C25 = c(99.7, 108.5, 86, 94.6, 97.3, 113.7, 
92.7, 99.9, 143, 151.6, 140.5, 166.2, 165.2, 156.7, 173.8, 93.9, 
164.6, 183.8, 163.9, 136.1, 148.4), C26 = c(117.2, 135.3, 129.7, 
114.9, 118.3, 147.4, 115, 124.1, 167, 170.8, 168.9, 165.1, 172.3, 
164.8, 168.1, 83.6, 171.2, 176.7, 176.7, 173.6, 143.4), C27 = c(108, 
119.4, 99.1, 107, 112.3, 125.7, 99.7, 109.7, 132.4, 131.6, 127.3, 
138, 149.8, 139.2, 159.7, 71.6, 143.3, 170.4, 148.2, 128.8, 139.8
), C28 = c(92.8, 108.3, 116, 85.2, 91.3, 109.8, 86, 94.2, 111.1, 
112.7, 111.5, 123.7, 128.1, 116.5, 137.1, 62.5, 114.9, 129.1, 
115.4, 129.8, 94.9), C29 = c(77.5, 97.7, 74, 80.1, 92.7, 106.2, 
79.4, 90.9, 112, 114.9, 107.6, 121.2, 124, 117.7, 117.1, 50.1, 
119.5, 128.1, 122.7, 82.9, 113.3), C30 = c(122, 148.4, 121.6, 
130.5, 133.1, 152.7, 118.1, 132.4, 155.7, 154.7, 144.9, 163.6, 
167.9, 161.7, 160.6, 69.6, 165.7, 177, 160.2, 131.1, 147.6), 
    C31 = c(84.9, 94.7, 74, 75.8, 81, 95.9, 77.9, 86.2, 256.8, 
    257.9, 242.7, 269.3, 285.5, 251.3, 310.5, 121.1, 261.9, 300.9, 
    269.8, 237.6, 253.8), C32 = c(110.6, 123.8, 110.5, 100.1, 
    109.2, 127.1, 101.1, 109.4, 112.5, 115, 109.9, 128.6, 127.5, 
    121.5, 126.8, 57.5, 124.6, 138.1, 121.4, 102.6, 111.7), C33 = c(94.5, 
    117, 149.2, 87, 91.7, 114.1, 94.9, 103.6, 105.4, 88, 85.3, 
    94.9, 93.6, 98.9, 100.4, 75.6, 125.2, 92.4, 81.8, 91.7, 79.9
    ), D35 = c(80.5, 86.7, 89.7, 92.7, 84.4, 86.6, 73.8, 67, 
    115.2, 95.5, 97.9, 94.2, 92.1, 97.1, 102.3, 92, 93.3, 96.1, 
    95.9, 101.1, 111.9), E36 = c(NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
    )), row.names = c(NA, -21L), class = c("tbl_df", "tbl", "data.frame"
))

`na.rm` is an argument to a functuion, not part of an equation, so you'd pass it as an argument to the relevant function. But `na.rm` really only applies to functions that summarise, like `mean`. It makes no sense for `round` since `round` transforms individual values. You can't "round" an `NA`: it's still `NA`. — Limey, Jul 14 '23 at 08:09
I’m voting to close this question because `round`ing an `NA` makes no sense. — Limey, Jul 14 '23 at 08:10
okey...But then Why I am getting NAs values when I calculate: alimentació = C10*(C10/(C10+C11+C12)) + C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12))? C10 has NAs values but C11 and C12 no. — Maria, Jul 14 '23 at 08:14
You are adding `NA` with other numbers. For example, what would you expect of the result of `NA + 1`? It will still be `NA`. I am voting to close this question, too. — Liang Zhang, Jul 14 '23 at 08:25
But is there any option so that the equation is still calculated excluding NAs? — Maria, Jul 14 '23 at 08:29
Missing values adding any other values results in missing values. You might want to sum them and replace `NA` with 0. There is `na.rm` for `sum()`! Try using that instead. — Liang Zhang, Jul 14 '23 at 08:31
What is the expected result of `C10*(C10/(C10+C11+C12)) + C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12))` if C10 is `NA` ? — Cettt, Jul 14 '23 at 08:34
The same as C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12)). SO the thing is that I have different groups, and for some I have data in C10, but for other no. — Maria, Jul 14 '23 at 08:41

Miff · Accepted Answer · 2023-07-14T10:11:26.777

From your comments, it seems like you can replace the NAs with zeros in the relevant columns, then use your existing code:

branques_Idescat_EU <- indexIPI_branques_EU_wide %>% mutate(across(C10:E36, function(x)ifelse(is.na(x),0, x))) %>% 
  mutate(alimentació = round(C10*(C10/(C10+C11+C12)) + C11*(C11/(C10+C11+C12)) + C12*(C12/(C10+C11+C12)), 1),
         tèxtil = round(C13*(C13/(C13+C14+C15)) + C14*(C14/(C13+C14+C15)) + C15*(C15/(C13+C14+C15)), 1), 
         paper = round(C17*(C17/(C17+C18)) + C18*(C18/(C17+C18)), 1),
         químiques = C20,
         farmàcia = C21,
         plàstics = C22,
         minerals = C23,
         metalurgia = round(C24*(C24/(C24+C25)) + C25*(C25/(C24+C25)), 1),
         electrònica = round(C26*(C26/(C26+C27)) + C27*(C27/(C26+C27)), 1),
         maquinària = C28,
         transport = round(C29*(C29/(C29+C30)) + C30*(C30/(C29+C30)), 1),
         altres = round(C16*(C16/(C16+C31+C32+C33)) + C31*(C31/(C16+C31+C32+C33)) + C32*(C32/(C16+C31+C32+C33)) + C33*(C33/(C16+C31+C32+C33)),1),
         energia = D35,
         aigua = E36, 
         na.rm=TRUE) %>% 
  select(time, geo, alimentació:aigua)

With this, I get the same result for all observations. I updated my question with sample data. — Maria, Jul 14 '23 at 09:36
Yes, there was a mistake there - I think that the modified approach now works as required — Miff, Jul 14 '23 at 10:14
This works in the sense that it runs without error. But running without error is not the same as giving the correct answer. We cannot judge if the answqer is correct without context. I mean no disrespect to @Miff, but I strogly urge OP to consider carefully if this is an *appropriate* solution. — Limey, Jul 14 '23 at 17:24

Where to place NA.RM=TRUE in an equation?

1 Answers1