I want to estimate an "Average curve" from curves of multiple trials. I have done this before using approx() , but then I had a fixed set of x-axis values against which y was measured.
In this dataset, values are mixed for both x and y (i.e., there are no fixed values of x for which y has been measured). Instead, different set of x values for every trial.
Is there a way to average curves in these situations (with standard errors)?
Alternatively : How would you extract y-values (for a fixed set of x-values) from different curves and construct a new dataframe ?
I have provided a sample dataset (melted) - and the code for plotting the curves for individual trials. P1, P2,P3,P4, P5 the names/ID for the individual trials
> dput(head(dat,74))
structure(list(ID = structure(c(7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L), .Label = c("LCRA_P1", "LCRA_P2",
"LCRA_P3", "LCRA_P4", "LCRA_P5", "LCRA_P6", "P1", "P2", "P3",
"P4", "P5"), class = "factor"), Time = c(170L, 452L, 572L, 692L,
812L, 932L, 1052L, 1172L, 1292L, 1412L, 1532L, 1652L, 1772L,
1892L, 2012L, 2132L, 2252L, 54L, 290L, 410L, 530L, 650L, 770L,
890L, 1010L, 1130L, 1250L, 1370L, 1490L, 1610L, 1730L, 1850L,
1970L, 115L, 235L, 355L, 475L, 595L, 715L, 835L, 955L, 1075L,
1195L, 1315L, 1435L, 1555L, 1675L, 1795L, 135L, 201L, 321L, 441L,
561L, 681L, 801L, 921L, 1041L, 1161L, 1281L, 1401L, 100L, 251L,
371L, 431L, 491L, 611L, 731L, 791L, 851L, 911L, 971L, 1031L,
1091L, 1151L), I = c(154.5066034, 138.3819058, 104.8425346, 61.6283449,
40.34374398, 35.18384073, 29.37894957, 40.34374398, 44.85865933,
27.44398585, 31.9589012, 41.6337198, 54.53347792, 64.20829652,
70.65817559, 66.78824815, 66.78824815, 154.5066034, 90.00781278,
73.88311512, 62.2733328, 61.6283449, 57.75841746, 53.24350211,
48.08359886, 55.17846583, 51.30853839, 42.92369561, 53.24350211,
50.66355049, 54.53347792, 38.40878026, 54.53347792, 154.5066034,
73.88311512, 62.2733328, 61.6283449, 57.75841746, 53.24350211,
48.08359886, 55.17846583, 51.30853839, 42.92369561, 38.40878026,
54.53347792, 37.79284177, 35.21289014, 39.08281758, 154.5066034,
129.997063, 84.84790953, 51.30853839, 40.98873189, 33.24887701,
29.37894957, 27.44398585, 33.24887701, 33.89386492, 31.9589012,
31.9589012, 135.1569662, 85.49289744, 48.08359886, 48.08359886,
22.2840826, 27.44398585, 49.37357467, 51.30853839, 31.9589012,
28.73396167, 23.57405841, 21.63909469, 9.384324471, 25.50902213
)), .Names = c("ID", "Time", "I"), row.names = c(NA, 74L), class = "data.frame")
(The code for plotting is included)
> ggplot(dat,aes(x=Time, y = I, colour=ID)+
geom_point()+
labs(x="Time (Seconds)", y ="Infiltration (mm/hour)")+
scale_x_continuous(breaks=seq(0,2500,100))+
scale_y_continuous(breaks=seq(0,160,10))+
geom_line(aes(group=ID))
To average, I used this :
ggplot(df2,aes(x=Time, y=I))+
stat_summary(fun.data="mean_se",mult=1, geom="smooth")
The result (the figure below) is not making any sense.