3

I try to create a survival prediction' diagramm

library("survival")
# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data =  lung)
res.cox

Fit a new data

sex_df <- with(lung,
               data.frame(sex = c(1, 2), 
                          age = rep(mean(age, na.rm = TRUE), 2),
                          wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2)  ))

The diagramm

library("ggplot2")
fit <- survfit(res.cox, newdata = sex_df)
library(reshape2)
dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)
head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")

obj = strsplit(as.character(data$variable), "[.]") # делим текст на объекты по запятой

data$line = sapply(obj, '[', 1)
data$number = sapply(obj, '[', 2)

ggplot(data, aes(x=time, y=value, group=variable)) +
  geom_line(aes(linetype=line, color=as.factor(number), size=line)) +
  # geom_point(aes(color=number)) +
  theme(legend.position="top", axis.text = element_text(size = 20), 
        axis.title = element_text(size = 20), 
        legend.text=element_text(size=40),
        legend.key.size = unit(3,"line"))+
  scale_linetype_manual(values=c( 2,1,2))+ # "dotted", "twodash","dotted"
  scale_color_manual(values=c("#E7B800", "#2E9FDF", 'red'))+
  scale_size_manual(values=c(2, 3.5, 2)) +
  scale_x_continuous(limits=c(0, 840),
                     breaks=seq(0, 840, 120)) + ylab("Surv prob") + 
  guides(linetype = FALSE, size = FALSE, color = guide_legend(override.aes = list(size=5))) + labs(color='') + 
  geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & 
                                          data$number == "1"],6), 
       ymax = rep(data$value[data$line == 'upper' & data$number == "1"],6)), 
       fill = "#E7B800",alpha=0.1) +
  geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6), 
                  ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)), 
              fill = "#2E9FDF",alpha=0.1) 

enter image description here

The QUESTION The diagramm is ok but but I have to add with hands this

geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == "2"],6), 
                      ymax = rep(data$value[data$line == 'upper' & data$number == "2"],6)), 
                  fill = "#2E9FDF",alpha=0.1) 

And if there were three, but not two elements in the new data, you would have to rewrite the code. Is it possible to rewrite the code so that it does not depend on the number of elements of new data? I try to use a loop

temp = list()
uniq <- unique(unlist(data$number))
for (i in 1:length(levels(as.factor(data$number)))) {
  n = geom_ribbon(aes(ymin = rep(data$value[data$line == 'lower' & data$number == uniq[i]],6), 
                        ymax = rep(data$value[data$line == 'upper' & data$number == uniq[i]],6)), 
                  fill = "#2E9FDF", alpha=0.1) # 
  temp = append(n, temp)
  }
temp

but this is an unsuccessful attempt. Thanks for any idea

Edward
  • 4,443
  • 16
  • 46
  • 81

1 Answers1

3

By reshaping the data.frame so that surv, lower, and upper are separate vectors, you can group the geom_ribbon by your elements rather than the "meaning" of the lines.

Below is the code using the tidyr package; the first section is simply your code for generating the data.

library(survival)
library(reshape2)
library(ggplot2)

# fit regression
res.cox <- coxph(Surv(time, status) ~ age + sex + wt.loss, data =  lung)
res.cox

sex_df <- with(lung,
               data.frame(sex = c(1, 2), 
                          age = rep(mean(age, na.rm = TRUE), 2),
                          wt.loss = rep(mean(wt.loss, na.rm = TRUE), 2)  ))


fit <- survfit(res.cox, newdata = sex_df)

dat = data.frame(surv = fit$surv,lower= fit$lower, upper = fit$upper,time= fit$time)

head(dat)
head(melt(dat, id="time"))
data = melt(dat, id="time")

# Reformats the data into format with the survival curve and the confidence intervals in their own columns
library(tidyr)

data_wide <- data %>%
  separate(col = variable, into = c("type", "sex"), sep = "\\.") %>%
  spread(key = type, value = value)


ggplot(data = data_wide) +
  geom_line(aes(x = time, y = surv, group = sex, colour = sex),
            size = 3.5,
            linetype = 1) +
  geom_line(aes(x = time, y = lower, group = sex, colour = sex),
            size = 2,
            linetype = 2) +
  geom_line(aes(x = time, y = upper, group = sex, colour = sex),
            size = 2,
            linetype = 2) +
  # Geom_ribbom now grouped by sex
  geom_ribbon(aes(x = time, ymin = lower, ymax = upper, group = sex, fill = sex),
              alpha = 0.1) +
  scale_colour_manual(values = c("#E7B800", "#2E9FDF")) +
  scale_fill_manual(values = c("#E7B800", "#2E9FDF")) +
  scale_x_continuous(limits = c(0, 840),
                     breaks = seq(0, 840, 120)) +
  theme(legend.position = "top",
        axis.text = element_text(size = 20),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 40),
        legend.key.size = unit(3, "line")) +
  ylab("Surv prob")

And this is the plot output: enter image description here

We add another element to test if this works, you will have to add more colours to scale_colour_manual and scale_fill_manual.

library(dplyr)
data_wide2 <- filter(data_wide, sex == "1") %>%
  mutate(sex = "3",
         surv = surv - 0.2,
         upper = upper - 0.2,
         lower = lower - 0.2) %>%
  rbind(data_wide)

This gives the following plot:

enter image description here

Fragilaria
  • 159
  • 7