0

I need to create 2 new columns based on the conditions across 7 columns for over a million rows. Sample data is below.
I need newcol1 to return 1/0 if R ==1 and if any or all of the 6 columns D:N ==1. R==0 regardless of D:N value I need newcol2 to return 0,1,2,3,4,5,6,or 7 if R==1 and depending on which column D:N==1 return a

#R==1 & D==1 ~1  
#R==1 & M==1 ~2  
#R==1 & P==1 ~3  
#R==1 & A==1 ~4  
#R==1 & S==1 ~5  
#R==1 & N==1 ~6  
#R==1 & D:N has more than 1 column with a 1 ~7  
#R==0 & no matter what is in column D:N ~0  
#no matter what R== if all D:N ==0 ~0  

Sample Data

Id  D   M   P   A   S   N   R
1   0   0   0   0   0   0   0
2   1   1   1   1   1   1   0
3   1   1   1   1   1   1   1
4   1   1   0   0   0   0   1
5   1   0   0   0   0   0   1
6   1   0   1   0   0   0   1
7   1   0   0   1   0   0   1
8   1   0   0   0   1   0   1
9   1   0   0   0   0   1   1
10  0   1   0   0   0   0   1
11  0   0   1   0   0   0   1
12  0   0   0   1   0   0   1
13  0   0   0   0   1   0   1
14  0   0   0   0   0   1   1
15  0   1   1   1   1   1   1
16  0   0   0   0   0   0   1
17  1   0   0   0   0   0   1
18  1   0   1   1   0   0   1
19  0   1   0   0   0   1   1
20  0   0   1   0   1   0   1
21  0   0   0   0   0   1   1
22  0   0   0   0   1   0   1
23  0   0   0   1   0   0   1
24  0   0   1   0   0   0   1

I've tried many versions of case_when, across, list,etc. and I can't get the syntax right. Thank you in advance for your help.

    new.df <- test.df %>%
      rowwise(Id) %>% 
      mutate(newcol1 = case_when(
          R ==0 &  D==1 & M==1 & P==1 & A==1 & S==1 & N==1 ~ 0,   #0 regardless of D:N value because R==0
           R ==1 &  for any D==1 & M==1 & P==1 & A==1 & S==1 & N==1 ~ 1,   #1 as long as at least 1 D:N==1
            ),
            newcol1 = factor(
            newcol1,
          level = c( "0", "1")
        )
    )
    new.df2 <- new.df
    rowwise(Id) %>% 
       mutate(newvcol2 = case_when(
    R ==1 &  D==1 & M:N ==0 ~ 1,
    R ==1 &  M==1 & D==0 & P:N ==0 ~ 2,
    R ==1 &  P==1 & D:M==0 & A:N ==0 ~ 3,
    R ==1 &  A==1 & D:P==0 & S:N ==0 ~ 4,
    R ==1 &  S==1 & D:A==0 & N==0 ~ 5, 
    R ==1 &  N==1 & D:S ==0 ~ 6,
    R ==1 & more than 1 of D:N ==1 ~ 7 ,
    TRUE ~ 0), # should include (R ==0 & any value in D:N  ~ 0, if all D:N==0 ~0)
              newcol2 = factor(
          newcol2,
          level = c( "0", "1", "2", "3", "4", "5", "6", "7")
        )
    )                  

The code from the question below looks very close to what I need but I can't expand it because I don't understand all of it. Create column based on multiple conditions on different columns

    for _, sub in df.groupby(['cust_id']):              # test for each cust_id
    for col in ['prod_1', 'prod_2', 'first_act']:   # test columns in sequence
        tmp = sub[sub[col] == 1]                    # try to match
        if len(tmp) != 0:                           # ok found at least one
            df.loc[tmp.index[0], 't0'] = 1          # set t0 to 1 for first index found
            break

Desired Result

Id  D   M   P   A   S   N   R   newcol1 newcol2
1   0   0   0   0   0   0   0   0   0
2   1   1   1   1   1   1   0   0   0
3   1   1   1   1   1   1   1   1   7
4   1   1   0   0   0   0   1   1   7
5   1   0   0   0   0   0   1   1   1
6   1   0   1   0   0   0   1   1   7
7   1   0   0   1   0   0   1   1   7
8   1   0   0   0   1   0   1   1   7
9   1   0   0   0   0   1   1   1   7
10  0   1   0   0   0   0   1   1   2
11  0   0   1   0   0   0   1   1   3
12  0   0   0   1   0   0   1   1   4
13  0   0   0   0   1   0   1   1   5
14  0   0   0   0   0   1   1   1   6
15  0   1   1   1   1   1   1   1   7
16  0   0   0   0   0   0   1   0   0
17  1   0   0   0   0   0   1   1   1
18  1   0   1   1   0   0   1   1   7
19  0   1   0   0   0   1   1   1   7
20  0   0   1   0   1   0   1   1   7
21  0   0   0   0   0   1   1   1   6
22  0   0   0   0   1   0   1   1   5
23  0   0   0   1   0   0   1   1   4
24  0   0   1   0   0   0   1   1   3
Lilly
  • 1
  • 1

0 Answers0