0

I am pulling a variety of census data for multiple years. I want to interpolate this data using pw_interpolate, but this process drops character variables which are identifying my years (census_data), sort of like this looks like this:

  census_data GEOID NAME                      PERSONS
  <chr>       <chr> <chr>                       <dbl>
1 2010to2014  24001 Allegany County, Maryland   73976
2 2011to2015  24001 Allegany County, Maryland   73549
3 2012to2016  24001 Allegany County, Maryland   73060
4 2013to2017  24001 Allegany County, Maryland   72591
5 2014to2018  24001 Allegany County, Maryland   71977

Here is a process for creating a simplified dummy dataset:

library(tidycensus)
library(dplyr)
library(purrr)
library(sf)

#pull data for source data
source <- map_dfr(
  setNames(2014:2018, 2014:2018),
  ~get_acs(
    geography = "county",
    state = ("MD"),
    variables = "B01003_001",
    year = .x,
    output = "wide",
    geometry = FALSE
  ),
  .id = "census_data"
)

#cleanup the language a bit
source <- source %>% 
  mutate(census_data = paste0((as.numeric(source$census_data)  - 4),"to", as.numeric(source$census_data))) %>% 
  subset(., select = -c(B01003_001M)) %>% 
  rename(PERSONS = B01003_001E)

#get geography for source
geography <- get_decennial(
  geography = "county",
  variables = "P001001",
  year = 2010,
  state = "MD",
  geometry = TRUE
) %>%   
  rename(population = value) %>% 
  subset(., select = c(GEOID, geometry)) %>%
  st_transform(26950)

source <- left_join(geography, source, by=c("GEOID"))
rm(geography)

#get weights at smaller geography
pwweight <- get_decennial(
  geography = "tract",
  variables = "P001001",
  year = 2010,
  state = "MD",
  geometry = TRUE
) %>%   
  rename(population = value) %>% 
  subset(., select = c(GEOID, population)) %>%
  st_transform(26950)

#get target geography
target <- get_decennial(
  geography = "county",
  variables = "P001001",
  year = 2000,
  state = "MD",
  geometry = TRUE
) %>%   
  rename(population = value) %>% 
  subset(., select = c(GEOID, population)) %>%
  st_transform(26950)

And here is the pw_interpolation process that I have setup, and the outcome missing the identifying column:

int_10_to_00 <- interpolate_pw(
  from = source,
  to = target,
  to_id = "GEOID",
  weights = pwweight,
  weight_column = "population",
  crs = 26950,
  extensive = TRUE
)

  GEOID                                                                                geometry  PERSONS
  <chr>                                                                      <MULTIPOLYGON [m]>    <dbl>
1 24001 (((3233038 1599211, 3232502 1598971, 3230405 1598030, 3229896 1597802, 3229697 15977...  365153 
2 24003 (((3451074 1633459, 3451060 1633466, 3451021 1633321, 3450752 1632050, 3450684 16315... 2797582 
3 24005 (((3416531 1684217, 3416209 1684062, 3412541 1682268, 3411932 1681971, 3409993 16810... 4110086.
4 24009 (((3475047 1579365, 3474673 1579326, 3474485 1579377, 3474209 1579315, 3473777 15790...  452340 
5 24011 (((3532835 1659887, 3532475 1659293, 3532344 1659122, 3531958 1658779, 3531380 16582...  163733 
6 24013 (((3408831 1680445, 3408322 1680192, 3408072 1680085, 3404375 1678286, 3401818 16770...  837219 
tchoup
  • 971
  • 4
  • 11

0 Answers0