fdb is this file and I need to join it with number of HHs by block group but it's not working as Geeks for Geeks says it will because the number of census blocks (12 first in BlockCode) at the end isn't as many after merging as it says it is at the begginig, I was expecting that this join would give me 226773. This isn't as if it was merging more but it's giving me less and I can't find the right lead. I really don't understand the pieces where it gives me more than 239780 Census blocks they are according to tidyCensus. Could someone please?
library(tidycensus)
fdb <- read.csv("fbd_us_with_satellite_dec2020_v1.csv")
abbr <- c("AL","AK","AZ","AR","CA","CO","CT","DE","DC","FL","GA","HI","ID","IL","IN","IA","KS","KY","LA","ME","MD","MA","MI","MN","MS","MO","MT","NE","NV",
"NH","NJ","NM","NY","NC","ND","OH","OK","OR","PA","RI","SC","SD","TN","TX","UT","VT","VA","WA","WV","WI","WY") # 51 states
HH_units <- get_acs(geography = "block group", variables = c(households = "B25001_001"), state = abbr) # same as B01003_001 for income level # HHs
HH_units$households <- HH_units$estimate
library(dplyr)
HH_units <- HH_units %>%
select(
GEOID,
NAME,
households
)
unique(fdb$BlockCode) # 11164855 rows
fdb$GEOID <- substring(fdb$BlockCode, 1, 12)
unique(fdb$GEOID) # it's got 226773 block lines
# Apparently you got to increase R's memory limit to join huge data sets
memory.limit()
memory.limit(400000)
all <- merge(x = fdb,
y = HH_units,
by = "GEOID")
unique(all$GEOID) # 138625 rows and not 226773
all2 <- full_join(x = fdb,
y = HH_units,
by = "GEOID")
unique(all2$GEOID) # 326928
all3 <- right_join(x = fdb,
y = HH_units,
by = "GEOID")
unique(all3$GEOID) # 239780