I am trying to get a data frame of t.test outputs grouped by species for many variables. I have a subset of data that looks like this;
structure(list(Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L), levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), Experiment = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L), levels = c("standard 1",
"standard 2"), class = "factor"), Alpha.peak.1b = c(-78.35, -78.34,
-78.84, -78.85, -78.01, -78.17, -79.17, -79.01, -79.01, -79.18,
-79.18, -79.72, -77.85, -77.86, -77.69, -78.01, -77.86), Alpha.recrystalization.peak = c(-70.34,
-78.34, -70.83, -70.67, -70.67, -71.17, -71, -71.17, -70.67,
-70.84, -71.01, -70.67, -70.67, -70.68, -70.84, -70.67, -70.68
), Beta.peak.1a = c(-39.37, -39.63, -39.85, -39.53, -39.69, -39.85,
-40.52, -40.52, -40.69, -40.86, -40.69, -40.68, -38.87, -38.56,
-38.72, -38.86, -38.39)), row.names = c(NA, -17L), class = c("tbl_df",
"tbl", "data.frame"))
I am trying the following;
df.ttest<-df %>%
group_by(Species) %>%
group_modify(~ broom::tidy(df[,3:ncol(df)], 2, function(x) t.test(x ~ Experiment, data = df)))
On my subset of data i get the following error;
Error: unexpected ',' in "ction(x) t.test(x ~ Experiment, data = df)))
However, on my full data, I get an output that looks like this;
structure(list(Species = structure(c(1L, 1L, 1L), levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), column = c("Alpha.peak.1b", "Alpha.recrystalization.peak",
"Beta.peak.1a"), n = c(Alpha.peak.1b = 47, Alpha.recrystalization.peak = 47,
Beta.peak.1a = 47), mean = c(Alpha.peak.1b = -78.5808510638298,
Alpha.recrystalization.peak = -71.0140425531915, Beta.peak.1a = -39.4
), sd = c(Alpha.peak.1b = 0.577779970860107, Alpha.recrystalization.peak = 1.18348905612851,
Beta.peak.1a = 0.869930032169005), median = c(Alpha.peak.1b = -78.68,
Alpha.recrystalization.peak = -70.84, Beta.peak.1a = -39.52),
trimmed = c(Alpha.peak.1b = -78.5807692307692, Alpha.recrystalization.peak = -70.8828205128205,
Beta.peak.1a = -39.4182051282051), mad = c(Alpha.peak.1b = 0.490000000000009,
Alpha.recrystalization.peak = 0.170000000000002, Beta.peak.1a = 0.669999999999995
), min = c(Alpha.peak.1b = -79.72, Alpha.recrystalization.peak = -78.34,
Beta.peak.1a = -40.86), max = c(Alpha.peak.1b = -77.52, Alpha.recrystalization.peak = -69.51,
Beta.peak.1a = -37.69), range = c(Alpha.peak.1b = 2.2, Alpha.recrystalization.peak = 8.83,
Beta.peak.1a = 3.17), skew = c(Alpha.peak.1b = 0.027222651634338,
Alpha.recrystalization.peak = -5.13007598432232, Beta.peak.1a = 0.112391518548281
), kurtosis = c(Alpha.peak.1b = 2.00457929376749, Alpha.recrystalization.peak = 32.7138551153747,
Beta.peak.1a = 1.9708376743257), se = c(Alpha.peak.1b = 0.0842778705371632,
Alpha.recrystalization.peak = 0.172629621110036, Beta.peak.1a = 0.126892336746095
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -3L), groups = structure(list(Species = structure(1L, levels = c("Apetahia longistigmata",
"Clermontia fauriei", "Cyanea fissa", "Cyanea hardyi", "Lobelia gregoriana",
"Lobelia hypoleuca", "Lobelia lukwangulensis", "Trematolobelia kauaiensis"
), class = "factor"), .rows = structure(list(1:3), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -1L), .drop = TRUE))
EDIT for better expected outcome:
I am expecting an output that has the t.test
output but also columns for species and variables, respectively. For example;
structure(list(Species = "Species x", Variable = "Variable x",
estimate = 0.10705965418003, estimate1 = 0.460507391694923,
estimate2 = 0.353447737514893, statistic = c(t = 1.5638634504756),
p.value = 0.198127496637623, parameter = c(df = 3.72179497232624),
conf.low = -0.0887480304981188, conf.high = 0.30286733885818,
method = "Welch Two Sample t-test", alternative = "two.sided"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
Any help is greatly appreciated!