1

I have a SAS code similar to the below logic. I would like to convert it to R script using if-else or ifelse method. Here is the SAS code example:

if cat1=1 then do;
 if cat2 eq 'U' then do;
  if var1 =. then var1 = var3;
 end;
 else if cat2='A' then do;
  if var1 =. then var1=var2;
 end;
 end;
else do;
 if cat2 eq 'U' then do;
  if cat3 = 'NC' then do;
   if var1 =. then do;
    if var2 ge 4.0 then var1 =1.15;
   else if var2 le 4.0 then var1 = 10.0;
   else var1 = 6.1;
end;
end;
end;
end;

Here is the converted R script code. However, the last step (df4) overwrite all the previous steps. Is there a better way to convert this logic into R if - else statement.

df <- data.frame(cat1 = c(rep(1,3), rep(0,4)), 
             cat2 = c(rep('U',2), rep('A',3), 'U', 'U'), 
             cat3 = c(rep('NC',2), rep('RF',2), 'NC', 'NC', 'NC'),
             var1 = c(rep(NA,5),.14, NA), 
             var2 = c(rep(NA,2), 4.5, 2.3, 8.5, 3.4, 2.4), 
             var3 = c(rep(2.6, 2), 3.9, NA, 4.2, .012, NA))

df2 <- transform(df,
             var1 = ifelse(is.na(var1) & cat1 ==1 & cat2  == 'U', var3, var1) 
             )
df3 <- transform(df2, 
             var1 = ifelse(is.na(var1) & cat1 ==1 & cat2 == 'A', var2, var1)
             )
df4 <- transform(df3,
             var1 = ifelse(is.na(var1) & cat2 == 'U' & cat3 == 'NC' & var2 >= 4.0, 1.15,
                           ifelse(var2 <= 4.0, 10.0, 6.1))
             )

Please help?

Joe
  • 62,789
  • 6
  • 49
  • 67
R. zqkal
  • 57
  • 4
  • There may be a logical problem with the last conditional since all the var1's with cat2=="U" will no longer be missing after the first two steps, so nothing will be replaced. – IRTFM Oct 21 '15 at 17:18

1 Answers1

1

I generally try to make "targeted assignments", i.e. assignments only into the locations that match the current criteria. That way I can avoid the problems with overwriting when I modify the entire vector with nested ifelse's that I have not adequately debugged:

df$var1[ with(df, is.na(var1) & cat1 ==1 & cat2 == 'U' )] <- df$var3
df$var1[ with(df,(is.na(var1) & cat1 ==1 & cat2 == 'A') ] <- df$var2
df$var1[ with(df, is.na(var1) & cat2 == 'U' & cat3 == 'NC') ] <-
    with( df[ with(df, is.na(var1) & cat2 == 'U' & cat3 == 'NC') , ],
           ifelse( var2 >= 4.0, 1.15,
                           ifelse(var2 <= 4.0, 10.0, 6.1))
         )

I'm not sure I got the logic correct. I just copied your R conditionals into the selections.

df

  cat1 cat2 cat3  var1 var2  var3
1    1    U   NC  2.60   NA 2.600
2    1    U   NC  2.60   NA 2.600
3    1    A   RF    NA  4.5 3.900
4    0    A   RF    NA  2.3    NA
5    0    A   NC    NA  8.5 4.200
6    0    U   NC  0.14  3.4 0.012
7    0    U   NC 10.00  2.4    NA
IRTFM
  • 258,963
  • 21
  • 364
  • 487