0

I want to plot in the X axis the % of patients in each treatment allocation that present the different diseases. However, using this code below I only get the absolute number of patients that present each disease in each treatment allocation. Example= 5 patients allocated in TB-treatment present anemia and what I need is 14% of the patients allocated in TB-treatment present anemia.

ggplot(mydata[, aes(x = disease2, y = (..count../102)*100, colour = treatment_allocation)) + 
  geom_point(stat = "count", position = "dodge", aes(shape=treatment_allocation), size=4) + 
  coord_flip() + 
  labs(x="", y="% of patients") + 
  scale_colour_manual(values=c("grey","goldenrod3", "deepskyblue3", "seagreen3")) + 
  scale_y_continuous(breaks = c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95)) + 
  scale_shape_manual(values=c(15,16,17,18))

enter image description here

The data is:

mydata <- structure(list(
  sae_safety_coordinator = structure(c(
    2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
    2L, 2L, 2L, 2L, 2L, 2L, 2L
  ), .Label = c(
    "AE", "SAE", "AR", "SAR",
    "UAR", "SUSAR"
  ), class = "factor"), treatment_allocation =
    structure(c(
      2L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 1L, 1L,
      1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 3L, 4L, 3L, 4L, 4L, 4L, 4L, 4L,
      4L, 3L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 3L, 1L, 1L, 1L,
      1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 3L, 3L, 3L, 1L, 4L, 3L, 2L,
      1L, 1L, 3L, 2L, 4L, 4L, 1L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 2L, 3L, 3L,
      3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 1L,
      1L, 1L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 2L, 2L, 3L, 3L, 2L, 1L, 4L,
      3L, 4L, 4L, 4L, 4L, 4L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 4L, 4L,
      3L, 3L, 3L, 2L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 3L,
      4L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L,
      3L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 3L, 2L,
      4L, 2L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L,
      4L, 4L, 1L, 3L, 3L, 1L, 1L, 3L, 2L, 2L, 2L, 4L, 4L, 4L, 1L, 1L, 1L,
      1L, 2L, 3L, 4L, 3L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 4L, 4L, 1L, 4L,
      2L, 2L, 2L, 1L, 2L, 2L, 2L, 3L, 4L, 4L, 1L, 1L
    ), .Label = c("Standard
of Care", "TB-Treatment", "Valganciclovir", "TB-Treatment +
Valganciclovir"), class = "factor"),
  disease2 = structure(c(
    41L, 41L, 36L, 42L, 19L, 10L, 11L,
    10L, 4L, 11L, 41L, 28L, 42L, 26L, 41L, 2L, 15L, 28L, 42L,
    19L, 19L, 2L, 11L, 34L, 41L, 42L, 15L, 11L, 12L, 42L, 41L,
    19L, 42L, 41L, 19L, 42L, 41L, 8L, 19L, 4L, 19L, 28L, 26L,
    42L, 42L, 42L, 43L, 48L, 41L, 42L, 37L, 27L, 41L, 33L, 41L,
    28L, 8L, 36L, 12L, 43L, 2L, 41L, 43L, 42L, 41L, 41L, 42L,
    42L, 41L, 33L, 12L, 37L, 42L, 41L, 36L, 41L, 41L, 12L, 28L,
    41L, 38L, 12L, 41L, 37L, 42L, 2L, 41L, 28L, 28L, 11L, 28L,
    1L, 41L, 35L, 36L, 26L, 41L, 41L, 10L, 48L, 42L, 2L, 42L,
    29L, 8L, 4L, 41L, 4L, 42L, 2L, 41L, 42L, 37L, 11L, 41L, 41L,
    42L, 41L, 8L, 11L, 46L, 26L, 41L, 28L, 44L, 2L, 36L, 42L,
    41L, 48L, 41L, 11L, 42L, 10L, 41L, 41L, 28L, 11L, 41L, 25L,
    11L, 27L, 42L, 11L, 42L, 36L, 43L, 42L, 28L, 42L, 42L, 3L,
    12L, 41L, 2L, 36L, 28L, 3L, 42L, 28L, 28L, 41L, 35L, 35L,
    43L, 11L, 33L, 37L, 21L, 41L, 42L, 19L, 28L, 2L, 42L, 9L,
    41L, 47L, 36L, 41L, 41L, 42L, 39L, 2L, 41L, 41L, 40L, 41L,
    26L, 11L, 19L, 4L, 26L, 41L, 13L, 11L, 41L, 47L, 2L, 19L,
    36L, 42L, 38L, 41L, 36L, 41L, 42L, 26L, NA, 42L, 11L, 41L,
    19L, 11L, 3L, 26L, 42L, 37L, 5L, 28L, 28L, 32L, 19L, 41L,
    19L, 13L, 11L, 20L, 33L, 5L, 42L, 4L, 42L, 19L, 41L, 28L,
    19L, 42L, 41L, 11L, 41L, 37L, 42L, 40L, 41L, 24L, 28L
  ), .Label = c(
    "Nosocomial condition",
    "Unattended death", "IRIS", "Malaria", "Acidosis", "Abnormality of albumin",
    "Hypo-osmolality and hyponatremia", "Elevated transaminases",
    "Lymphocytopenia", "Thrombocytopenia", "Anemia", "Neutropenia",
    "Leukopenia", "Sickle-cell disease without crisis", "Skin eruption",
    "Scabies", "Hookworm disease, unspecified", "Injury of kidney",
    "Sepsis", "Urinary tract infection, site not specified",
    "Acute kidney failure, unspecified", "Balanitis", "Unspecified hemorrhoids",
    "Necrotizing ulcerative stomatitis", "Hemorrhagic condition",
    "Nutritional marasmus", "HIV complications", "Gastroenteritis and colitis",
    "Abdominal pain", "Ascariasis", "Candidiasis", "Congenital malformation of heart, unspecified",
    "Cardiac failure and arrhythmia", "Disseminated intravascular coagulation",
    "Other secondary pulmonary hypertension", "Respiratory failure, unspecified",
    "Pneumonia due to COVID-19", "Pneumonitis due to inhalation of food and vomit",
    "Aspiration of fluid complication", "Pneumothorax, unspecified",
    "Pneumonia, unspecified organism", "Tuberculosis", "Upper respiratory infection",
    "Otitis media", "Mucopurulent conjunctivitis", "Unspecified viral encephalitis",
    "Bacterial meningitis, unspecified", "Disorders of nervous system"
  ), class = "factor")
), class = c("data.table", "data.frame"), row.names = c(NA, -247L))
Meisam
  • 601
  • 1
  • 3
  • 16
Sara
  • 185
  • 1
  • 9
  • 2
    Could you please put `mydata` values in your question using the following code `dput(mydata)` so that we can use it to reproduce your graph – Yacine Hajji May 31 '23 at 09:09
  • A little off-topic, but you can replace `c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95)` with `seq(0, 95, 5)`. – mhovd May 31 '23 at 10:03

1 Answers1

1

Most of the times it is easier to modify your dataframe outside ggpolot2 object and then just plot it. Here I have summarised the data to get the counts for each disease condition as well as total counts for each of the treatment allocations. Then you can simply proceed to plot it in ggplot2 by calculating the proportion as count/total_count:

mydata %>% 
  group_by(sae_safety_coordinator, treatment_allocation, disease2) %>% 
  summarise(count = n()) %>%
  left_join(mydata %>% 
              group_by(treatment_allocation) %>% 
              summarise(total_count = n()), by = "treatment_allocation") %>%
  ggplot(aes(x = disease2, y =  100*count/total_count, colour = treatment_allocation)) + 
  geom_point(aes(shape=treatment_allocation), size=4) +
  coord_flip() + 
  labs(x="", y="% of patients") + 
  scale_colour_manual(values=c("grey","goldenrod3", "deepskyblue3", "seagreen3"))

output:enter image description here

Meisam
  • 601
  • 1
  • 3
  • 16