I've been trying for days now to sort the order of strata and flows in ggalluvial. I want to visualize the flow of patients through different screenings procedures (X1, X2, X3, X4) and color the flow based on the final diagnosis (Values in X4).
Can you help me sort the values within the groups in the first columns of example A and B? I want all red, yellow, and blue values stacked on top of each other within each of the groups.
So far I have tried various combinations of wide-format, aes.flow "backwards" and "forwards," lode.guidance, and lode.ordering...
If this is not possible in ggalluvial but possible in other packages, I'd like to know as well.
Thanks in advance.
DATA in wide format:
set.seed(1)
data <- tibble(
ID = 1:879,
X1 = sample(c("only_parent", "parent_and_3D", "only_3D"), size = 879, replace = TRUE, prob = c(0.1, 0.8, 0.1))) %>%
mutate(
X2 = case_when(
X1 == "only_parent" ~ sample(c("only_I", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
X1 == "parent_and_3D" ~ sample(c("only_I", "both_I_and_II", "only_II", "not_identified"), size = n(), prob = c(0.05, 0.05, 0.2, 0.7), replace = TRUE),
X1 == "only_3D"~ sample(c("only_II", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
TRUE ~ NA_character_)) %>%
mutate(
X3 = case_when(
X2 == "only_I" ~ "PO_only",
X2 == "both_I_and_II" ~ sample(c("PO_and_EHL", "PO_and_F/T", "PO_and_F/T_and_EHL"), size = n(), prob = c(0.3, 0.5, 0.2), replace = TRUE),
X2 == "only_II"~ sample(c("F/T", "F/T_and_EHL", "EHL"), size = n(), prob = c(0.1, 0.6, 0.4), replace = TRUE),
X2 == "not_identified" ~ "not_identified",
TRUE ~ NA_character_)) %>%
mutate(
X4 = case_when(
X3 == "PO_only" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
X3 == "PO_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "PO_and_F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "PO_and_F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
X3 == "F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
X3 == "EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.2, 0.88), replace = TRUE),
X3 == "not_identified" ~ "not_identified",
TRUE ~ NA_character_ ))
head(data)
# A tibble: 6 x 5
ID X1 X2 X3 X4
<int> <chr> <chr> <chr> <chr>
1 1 parent_and_3D not_identified not_identified not_identified
2 2 parent_and_3D only_II F/T_and_EHL No TW
3 3 parent_and_3D not_identified not_identified not_identified
4 4 only_parent only_I PO_only No TW
5 5 parent_and_3D only_II F/T_and_EHL No TW
6 6 only_3D not_identified not_identified not_identified
Example A
The values are not sorted in the bottom box of the first column.
data_long_a <- data %>%
group_by(X1, X2, X3, X4) %>%
count() %>%
mutate(
fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>%
ungroup %>%
arrange(fill_stat) %>%
mutate(subject = seq(1, n())) %>%
gather(key, value, -n , -subject, -fill_stat) %>%
mutate(
key = factor(key, levels = c("X1", "X2", "X3", "X4"))) %>%
arrange(key, fill_stat)
data_long_a %>%
filter(key %in% c("X1", "X2")) %>%
ggplot(
aes(x = key,
y = n,
stratum = value,
alluvium = subject,
label = value))+
geom_flow(aes(fill = fill_stat)) +
geom_stratum() +
geom_text(stat = "stratum")+
scale_fill_manual(values=c("#BAB3B3EB", "red", "yellow", "blue"))+
theme_void()
Example B
The flow lines in the first column are not sorted.
data_long_b <- data %>%
select(-X1) %>%
filter(X4 != "not_identified") %>%
group_by(X2, X3, X4) %>%
count() %>%
mutate(
fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>%
ungroup %>%
arrange(fill_stat) %>%
mutate(subject = seq(1, n())) %>%
gather(key, value, -n , -subject, -fill_stat) %>%
mutate(
key = factor(key, levels = c("X2", "X3", "X4"))) %>%
arrange(key, fill_stat)
data_long_b %>%
ggplot(
aes(x = key,
y = n,
stratum = value,
alluvium = subject,
label = value))+
geom_flow(aes(fill = fill_stat),
aes.flow = "backward") +
geom_stratum() +
geom_text(stat = "stratum")+
scale_fill_manual(values=c("red", "yellow", "blue"))+
theme_void()