1

Here is a subset of my dataset. Patients had different types of adverse events (variables) with different types of severity (observations). I would like to create additional variables representing severity ("severe", "severe", "moderate") and have the number of severity types for each patient.

mydata<-structure(list(record_id = c("2", "4", "5", "9", "10", "11", 
    "12", "15", "22", "23"), `Dégré Cytolyse hep ` = structure(c(NA, 
    3L, NA, NA, 1L, NA, 2L, NA, 3L, NA), .Label = c("modéré", "grave", 
    "sévère"), class = "factor"), `Dégré Trble digest` = structure(c(1L, 
    NA, NA, NA, NA, 2L, 1L, 1L, NA, 3L), .Label = c("modéré", "grave", 
    "sévère"), class = "factor"), `Dégré Erupt cutanées` = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("modéré", 
    "grave", "sévère"), class = "factor"), `Dégré Ins renale` = structure(c(NA, 
    NA, NA, 1L, NA, NA, NA, NA, NA, NA), .Label = c("modéré", "grave", 
    "sévère"), class = "factor"), `Dégré Neuropath` = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("modéré", 
    "grave", "sévère"), class = "factor"), `Dégré Autre 1` = structure(c(NA, 
    NA, 1L, NA, NA, 1L, NA, 1L, 3L, NA), .Label = c("modéré", "grave", 
    "sévère"), class = "factor"), `Dégré Autre 2` = structure(c(NA, 
    NA, NA, NA, NA, 1L, NA, 1L, NA, NA), .Label = c("modéré", "grave", 
    "sévère"), class = "factor"), `Dégré Autre 3` = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("modéré", 
    "grave", "sévère"), class = "factor"), `Dégré Autre 4` = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("modéré", 
    "grave", "sévère"), class = "factor"), `Dégré Autre 5` = structure(c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), .Label = c("modéré", 
    "grave", "sévère"), class = "factor")), row.names = c(NA, 10L
    ), class = "data.frame")

The expected dataset will be:

   record_id Dégré Cytolyse hep  Dégré Trble digest Dégré Erupt cutanées Dégré Ins renale Dégré Neuropath Dégré Autre 1 Dégré Autre 2 Dégré Autre 3 Dégré Autre 4 Dégré Autre 5 modéré sévère grave
1          2                <NA>             modéré                 <NA>             <NA>            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      1      0     0
2          4              sévère               <NA>                 <NA>             <NA>            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      0      1     0
3          5                <NA>               <NA>                 <NA>             <NA>            <NA>        modéré          <NA>          <NA>          <NA>          <NA>      1      0     0
4          9                <NA>               <NA>                 <NA>           modéré            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      1      0     0
5         10              modéré               <NA>                 <NA>             <NA>            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      1      0     0
6         11                <NA>              grave                 <NA>             <NA>            <NA>        modéré        modéré          <NA>          <NA>          <NA>      2      0     1
7         12               grave             modéré                 <NA>             <NA>            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      1      0     1
8         15                <NA>             modéré                 <NA>             <NA>            <NA>        modéré        modéré          <NA>          <NA>          <NA>      3      0     0
9         22              sévère               <NA>                 <NA>             <NA>            <NA>        sévère          <NA>          <NA>          <NA>          <NA>      0      2     0
10        23                <NA>             sévère                 <NA>             <NA>            <NA>          <NA>          <NA>          <NA>          <NA>          <NA>      0      1     0
Seydou GORO
  • 1,147
  • 7
  • 13

2 Answers2

2

Here's a tidyverse approach. It assumes all columns that need to be counted starts with "Dégré", then do a rowwise() sum on these columns that matches the severity.

library(tidyverse)

mydata %>% 
  rowwise() %>% 
  mutate(sévère = sum(c_across(starts_with("Dégré")) == "sévère", na.rm = T),
         modéré = sum(c_across(starts_with("Dégré")) == "modéré", na.rm = T),
         grave = sum(c_across(starts_with("Dégré")) == "grave", na.rm = T)) %>% 
  ungroup()
benson23
  • 16,369
  • 9
  • 19
  • 38
1

This is very similat to a question I answered about a week ago. Using apply() and a user-written function:

# Defining useful function, to be passed within apply().
useful.fun = function(x) sum(x == i, na.rm = TRUE) 

for (i in c("modéré", "sévère", "grave")) # Iterating over possible severity levels.
{
  mydata$temp = apply(mydata, MARGIN = 1, useful.fun) # Requested results.
  colnames(mydata)[dim(mydata)[[2]]] = i # Renaming new column.
}
riccardo-df
  • 512
  • 4
  • 9