Overview
Hello I am working on a project with displaying a "best fit line" over raw data. I have very little statistical experience, so I am unsure what methodologies & functions to pursue. I am also unsure what the general output should be.
I am working with sigmoidal data, which can be noisy at times. I was informed that I will end up using logistical regression over linear regression.
Goal
-Plot the approximated logistic regression over the raw data using ggplot.
Sample dput() Data
structure(list(Temperature = c(0.35937, 0.3623, 0.88796, 1.38134,
1.89773, 2.40185, 2.90063, 3.40432, 3.92358, 4.40969, 4.91506,
5.42822, 5.93337, 6.43823, 6.95019, 7.46044, 7.95995, 8.45434,
8.98095, 9.48974, 10.00073, 10.5122, 11.00073, 11.51513, 12.03613,
12.54614, 13.04028, 13.5476, 14.04397, 14.58032, 15.07253, 15.58715,
16.09963, 16.60449, 17.11501, 17.60693, 18.12231, 18.63134, 19.14575,
19.63745, 20.16479, 20.65478, 21.15478, 21.64843, 22.15872, 22.65649,
23.1575, 23.67309, 24.17651, 24.67065, 25.19387, 25.69558, 26.19238,
26.7019, 27.20193, 27.70242, 28.19778, 28.70629, 29.19799, 29.69409,
30.20312, 30.70898, 31.21337, 31.71975, 32.21874, 32.7351, 33.22045,
33.74001, 34.24926, 34.73901, 35.26269, 35.75146, 36.26806, 36.76562,
37.28637, 37.77514, 38.29202, 38.78686, 39.2954, 39.80761, 40.31689,
40.81985, 41.31371, 41.8225, 42.3291, 42.85546, 43.3562, 43.87304,
44.37011, 44.88256, 45.38891, 45.89919, 46.40942, 46.92089, 47.42651,
47.94579, 48.479, 48.96218, 49.47411, 49.9851, 50.49438, 51.02368,
51.52905, 52.04907, 52.55493, 53.05493, 53.57543, 54.07836, 54.59548,
55.12451, 55.6206, 56.12866, 56.64379, 57.14745, 57.65945, 58.17553,
58.68432, 59.18408, 59.70019, 60.22167, 60.71703, 61.24246, 61.77538,
62.26391, 62.77612, 63.29614, 63.77807, 64.30053, 64.81689, 65.33279,
65.85131, 66.35229, 66.86694, 67.3933, 67.91723, 68.41577, 68.9436,
69.44677, 69.95141, 70.46655, 71.01635, 71.49514, 72.00906, 72.51269,
73.03542, 73.5498, 74.07055, 74.5747, 75.1018, 75.63061, 76.15283,
76.67504, 77.17822, 77.68456, 78.19848, 78.69775, 79.2124, 79.70727,
80.22656, 80.76611, 81.26049, 81.78369, 82.29101, 82.81469, 83.33544,
83.87496, 84.32372, 84.85815, 85.45971, 85.89111, 86.3623, 86.93578
), Absorbance = c(1.81071, 1.81388, 1.81683, 1.81888, 1.82262,
1.82458, 1.82688, 1.82958, 1.83234, 1.83512, 1.83743, 1.84024,
1.84237, 1.8451, 1.84772, 1.85036, 1.85254, 1.85495, 1.85805,
1.86069, 1.86304, 1.86508, 1.86808, 1.87077, 1.87352, 1.87564,
1.87863, 1.88164, 1.88402, 1.88598, 1.88886, 1.89159, 1.89392,
1.8968, 1.8995, 1.90179, 1.90508, 1.90725, 1.9098, 1.91265, 1.91516,
1.9173, 1.92062, 1.92298, 1.92563, 1.92855, 1.9307, 1.93383,
1.93642, 1.93903, 1.94168, 1.94381, 1.9462, 1.94994, 1.95289,
1.95581, 1.95902, 1.96158, 1.96398, 1.96661, 1.96978, 1.97321,
1.97583, 1.97916, 1.98271, 1.98456, 1.98892, 1.99297, 1.99605,
1.99921, 2.0035, 2.00686, 2.01138, 2.01495, 2.0189, 2.02396,
2.0282, 2.03317, 2.03781, 2.04254, 2.0479, 2.05363, 2.05974,
2.06564, 2.07107, 2.07914, 2.08561, 2.09258, 2.1002, 2.10902,
2.11876, 2.12582, 2.13495, 2.14506, 2.15465, 2.16517, 2.17522,
2.18627, 2.19739, 2.20907, 2.22094, 2.23388, 2.24563, 2.25891,
2.27144, 2.28452, 2.29779, 2.31205, 2.32543, 2.33695, 2.3501,
2.36332, 2.37649, 2.39207, 2.40574, 2.42009, 2.43282, 2.44392,
2.45723, 2.46878, 2.47973, 2.49073, 2.49976, 2.51041, 2.51965,
2.52679, 2.53644, 2.54241, 2.54962, 2.55618, 2.56106, 2.56637,
2.57346, 2.57632, 2.58174, 2.58477, 2.58925, 2.5937, 2.59516,
2.59829, 2.60149, 2.60401, 2.6065, 2.61033, 2.6111, 2.61375,
2.61648, 2.61617, 2.62002, 2.62089, 2.62385, 2.62798, 2.62696,
2.63116, 2.63123, 2.63459, 2.63557, 2.64139, 2.64367, 2.64472,
2.64471, 2.65139, 2.64948, 2.6567, 2.65765, 2.65911, 2.65614,
2.66194, 2.66976, 2.66926, 2.67418, 2.6769)), class = "data.frame", row.names = c(NA,
-172L))
Sample Data
library(ggplot2)
df = "insert dput() code"
#plot sigmoidal curve
ggplot(df, aes(x = Temperature, y = Absorbance, color = "red")) +
geom_point() +
theme_classic()
If there are any R methods or statistical functions that I can implement, feel free to drop suggestions!