0

I have data on the monthly number of people varying with a variable (a count) over a period of 30 years, 1987-2018. I have divided this period in three shorter periods to check whether the correlation is approximately the same then and now, fitting a model with the var as a response, the number of people and periods as predictors, and a Poisson distribution for the response.

I have made this script in ggplot, but want to make it work in the basic R plot mode. I also want the equation, R squared and the p-value in each of the three plots. This script works in ggplot:

library(ggplot2)
p <- ggplot(df, aes(people,var)) + geom_point()
p + facet_grid(rows=vars(period))
p + facet_grid(rows=vars(period)) + 
   stat_smooth(method="glm", method.args = list(family = "poisson"))
summary(glm(var~people+period, family=poisson, data=df))

And it makes this plot: enter image description here

Here's the data:

> dput(df)
structure(list(period = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), month = c("JAN", 
"FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", 
"NOV", "DEC", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", 
"AUG", "SEP", "OCT", "NOV", "DEC", "JAN", "FEB", "MAR", "APR", 
"MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC"), people = c(4068L, 
7251L, 14384L, 20513L, 18748L, 17760L, 23433L, 22878L, 12815L, 
8101L, 7477L, 5018L, 5063L, 11103L, 20946L, 32192L, 22665L, 29605L, 
37590L, 36692L, 16474L, 8989L, 5729L, 3909L, 8391L, 18638L, 31223L, 
40583L, 26889L, 40074L, 53218L, 52087L, 23873L, 13694L, 9354L, 
9822L), var = c(2L, 1L, 3L, 5L, 3L, 0L, 4L, 5L, 1L, 1L, 0L, 1L, 
0L, 2L, 1L, 1L, 4L, 1L, 3L, 4L, 0L, 0L, 0L, 1L, 0L, 0L, 2L, 6L, 
1L, 2L, 2L, 3L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, 
-36L))
Dag
  • 569
  • 2
  • 5
  • 20
  • You can make a set of graphs, but I don't think the base packages alone will permit you to overlay information such as a p-value and r-squared. What is ggplot not sufficient? – Ben Sep 29 '20 at 18:24

1 Answers1

3

I think what you want is below. Not sure what you're intending with the R-squared, right now it's coded as the squared correlation between the fitted and observed values, but this isn't a linear regression, so that measure of fit is probably not similarly appropriate here. In any event, you could sub in whatever calculation you wanted there.

par(mfrow=c(2,2))
  with(subset(df, period == 1), 
       plot(var ~ people, 
            col=cols[1], 
            pch=shapes[1],
       xlim = range(df$people), 
       ylim=range(e$fit), 
       main="Period 1"))
  mod1 <- glm(var~people, family=poisson, data=subset(df, period==1))
  b1 <- coef(mod1)
  txt <- paste("log(mu)=", round(b1[1], 3), " + ", round(b1[2], 6), "People", sep="")
  text(2000, 50, labels=txt, pos=4)
  text(2000, 57.5, label=paste("R-squared = ", round(cor(mod1$fitted, mod1$y)^2, 3)), pos=4)
  text(2000, 42.5, label=paste("p-value = ", round(summary(mod1)$coefficients[2,4], 3)), pos=4)
  with(subset(e, period == 1), 
       lines(people, fit, col=cols[1]))
  with(subset(df, period == 2), 
       plot(var ~ people, 
            col=cols[2], 
            pch=shapes[2],
       xlim = range(df$people), 
       ylim=range(e$fit), 
       main="Period 2"))
  mod2 <- glm(var~people, family=poisson, data=subset(df, period==2))
  b2 <- coef(mod2)
  txt <- paste("log(mu)=", round(b2[1], 3), " + ", round(b2[2], 6), "People", sep="")
  text(2000, 50, labels=txt, pos=4)
  text(2000, 57.5, label=paste("R-squared = ", round(cor(mod2$fitted, mod2$y)^2, 3)), pos=4)
  text(2000, 42.5, label=paste("p-value = ", round(summary(mod2)$coefficients[2,4], 3)), pos=4)
    with(subset(e, period == 2), 
       lines(people, fit, col=cols[2]))
  with(subset(df, period == 3), 
       plot(var ~ people, 
            col=cols[3], 
            pch=shapes[3],
       xlim = range(df$people), 
       ylim=range(e$fit), 
       main="Period 3"))
  mod3 <- glm(var~people, family=poisson, data=subset(df, period==3))
  b3 <- coef(mod3)
  txt <- paste("log(mu)=", round(b3[1], 3), " + ", round(b3[2], 6), "People", sep="")
  text(2000, 50, labels=txt, pos=4)
  text(2000, 57.5, label=paste("R-squared = ", round(cor(mod3$fitted, mod3$y)^2, 3)), pos=4)
  text(2000, 42.5, label=paste("p-value = ", round(summary(mod3)$coefficients[2,4], 3)), pos=4)
  with(subset(e, period == 3), 
       lines(people, fit, col=cols[3]))

enter image description here

DaveArmstrong
  • 18,377
  • 2
  • 13
  • 25