Raw Data: Here is the spreadsheet
I have 50 states, weekly data from 1997 to current that look like this:
structure(list(date = c("1/1/2011", "1/8/2011", "1/15/2011",
"1/22/2011", "1/29/2011", "2/5/2011", "2/12/2011", "2/19/2011",
"2/26/2011", "3/5/2011", "3/12/2011", "3/19/2011", "3/26/2011",
"4/2/2011", "4/9/2011"), CT = c(8593L, 14629L, 8084L, 6986L,
6050L, 6368L, 5408L, 5416L, 6098L, 5260L, 4428L, 3823L, 3808L,
4366L, 4697L), MA = c(13656L, 14779L, 9462L, 8565L, 9575L, 8548L,
9248L, 7569L, 11373L, 8891L, 7113L, 6775L, 4524L, 7099L, 7390L
), ME = c(2521L, 3811L, 3239L, 2306L, 2381L, 2000L, 1878L, 1745L,
1582L, 2008L, 1887L, 1676L, 1707L, 1898L, 1843L), NH = c(3155L,
2892L, 1983L, 1961L, 1948L, 1596L, 1533L, 1534L, 1905L, 1819L,
1434L, 1366L, 1288L, 1376L, 1477L), RI = c(3446L, 3159L, 2428L,
2221L, 2010L, 1891L, 2351L, 2107L, 2883L, 1763L, 1595L, 1434L,
1094L, 1154L, 1496L), VT = c(2173L, 1547L, 946L, 838L, 883L,
838L, 704L, 890L, 1230L, 761L, 1019L, 854L, 830L, 848L, 1205L
), NJ = c(18871L, 21451L, 16872L, 15175L, 12138L, 13015L, 12777L,
10372L, 10528L, 10036L, 9582L, 8551L, 9477L, 9141L, 9750L), NY = c(37620L,
61983L, 33269L, 27656L, 28151L, 26433L, 26553L, 22283L, 20990L,
38916L, 21999L, 20928L, 20657L, 21008L, 24132L), PR = c(2961L,
2437L, 2123L, 3423L, 4364L, 4525L, 4088L, 3765L, 3181L, 2857L,
3366L, 2985L, 2730L, 2382L, 2720L)), .Names = c("date", "CT",
"MA", "ME", "NH", "RI", "VT", "NJ", "NY", "PR"), row.names = c(NA,
-15L), class = c("tbl_df", "tbl", "data.frame"))
I would like make annual graphs for each states for the past 5 years. And this is what I have managed so far:
library(tidyverse)
library(lubridate)
require(plotly)
df <- read_csv('icclaims.csv') %>%
mutate(date = as.Date(date, format = '%m/%e/%Y'),
doy = as.numeric(format(date, '%j')),
Year = as.factor(year(date))) %>%
gather(key = 'state', value = 'claims', -date, -doy, -Year) %>%
arrange(date)
df %>%
filter(state %in% c('CA', 'MA')) %>%
# Make sample plot
ggplot(aes(x = doy, y = claims, group = Year, colour = Year)) +
geom_path() + geom_point() +
facet_wrap(~ state, ncol = 1) +
geom_smooth(span = 1, level=.99) + #THANKS TO @JIMBOU
theme_bw()
Here is what the output look like:
The final aim is to see if a new weekly data from a particular state is outside of the 99% confidence interval of that state, base on how ever long I decide to look into the past.
The remaining problem is how to get geom_smooth() to use all the years of the data to calculate the confidence interval rather than just a single year. In short, I want to print year by year of the last 5 years, but showing the confidence interval for whole range of the data from 1997 to 2017.
Thank you very much for your help.