I am pulling a variety of census data for multiple years. I want to interpolate this data using pw_interpolate, but this process drops character variables which are identifying my years (census_data), sort of like this looks like this:
census_data GEOID NAME PERSONS
<chr> <chr> <chr> <dbl>
1 2010to2014 24001 Allegany County, Maryland 73976
2 2011to2015 24001 Allegany County, Maryland 73549
3 2012to2016 24001 Allegany County, Maryland 73060
4 2013to2017 24001 Allegany County, Maryland 72591
5 2014to2018 24001 Allegany County, Maryland 71977
Here is a process for creating a simplified dummy dataset:
library(tidycensus)
library(dplyr)
library(purrr)
library(sf)
#pull data for source data
source <- map_dfr(
setNames(2014:2018, 2014:2018),
~get_acs(
geography = "county",
state = ("MD"),
variables = "B01003_001",
year = .x,
output = "wide",
geometry = FALSE
),
.id = "census_data"
)
#cleanup the language a bit
source <- source %>%
mutate(census_data = paste0((as.numeric(source$census_data) - 4),"to", as.numeric(source$census_data))) %>%
subset(., select = -c(B01003_001M)) %>%
rename(PERSONS = B01003_001E)
#get geography for source
geography <- get_decennial(
geography = "county",
variables = "P001001",
year = 2010,
state = "MD",
geometry = TRUE
) %>%
rename(population = value) %>%
subset(., select = c(GEOID, geometry)) %>%
st_transform(26950)
source <- left_join(geography, source, by=c("GEOID"))
rm(geography)
#get weights at smaller geography
pwweight <- get_decennial(
geography = "tract",
variables = "P001001",
year = 2010,
state = "MD",
geometry = TRUE
) %>%
rename(population = value) %>%
subset(., select = c(GEOID, population)) %>%
st_transform(26950)
#get target geography
target <- get_decennial(
geography = "county",
variables = "P001001",
year = 2000,
state = "MD",
geometry = TRUE
) %>%
rename(population = value) %>%
subset(., select = c(GEOID, population)) %>%
st_transform(26950)
And here is the pw_interpolation process that I have setup, and the outcome missing the identifying column:
int_10_to_00 <- interpolate_pw(
from = source,
to = target,
to_id = "GEOID",
weights = pwweight,
weight_column = "population",
crs = 26950,
extensive = TRUE
)
GEOID geometry PERSONS
<chr> <MULTIPOLYGON [m]> <dbl>
1 24001 (((3233038 1599211, 3232502 1598971, 3230405 1598030, 3229896 1597802, 3229697 15977... 365153
2 24003 (((3451074 1633459, 3451060 1633466, 3451021 1633321, 3450752 1632050, 3450684 16315... 2797582
3 24005 (((3416531 1684217, 3416209 1684062, 3412541 1682268, 3411932 1681971, 3409993 16810... 4110086.
4 24009 (((3475047 1579365, 3474673 1579326, 3474485 1579377, 3474209 1579315, 3473777 15790... 452340
5 24011 (((3532835 1659887, 3532475 1659293, 3532344 1659122, 3531958 1658779, 3531380 16582... 163733
6 24013 (((3408831 1680445, 3408322 1680192, 3408072 1680085, 3404375 1678286, 3401818 16770... 837219