This is a pretty standard use case where data was collected in "wide" form and is best worked with in "long" form. The difference is that in long form you only have one column for gender and another for which individual that gender belongs to. We'll use tidyr::gather
to wrangle it into long form and the use dplyr to summarize the number of households with 1, 2, 3, ... men.
library(dplyr)
library(tidyr)
wide.df <- tribble(
~gndr, ~gndr2, ~gndr3, ~gndr4, ~gndr5, ~gndr6, ~gndr7, ~gndr8, ~gndr9, ~gndr10,
"Male", "Female", NA, NA, NA, NA, NA, NA, NA, NA,
"Female", "Male", "Female", "Male", "Male", "Male", "Male", NA, NA, NA,
"Female", "Male", "Female", NA, NA, NA, NA, NA, NA, NA,
"Male", NA, NA, NA, NA, NA, NA, NA, NA, NA,
"Male", "Female", "Male", "Female", "Female", "Male", "Male", NA, NA, NA
)
wide.df %>%
mutate(household = 1:nrow(.)) %>%
gather(key = "individual", value = "gender", -household) %>%
mutate(individual = factor(individual),
gender = factor(gender)) %>%
filter(gender == "Male") %>%
group_by(household) %>%
summarize(males = n()) %>%
arrange(desc(males)) %>%
group_by(males) %>%
summarize(households = n())
# # A tibble: 3 x 2
# males households
# <int> <int>
# 1 3
# 4 1
# 5 1
Or if you want to count the numbers of households with males and females, then it's just an additional grouping.
wide.df %>%
mutate(household = 1:nrow(.)) %>%
gather(key = "individual", value = "gender", -household) %>%
mutate(individual = factor(individual),
gender = factor(gender)) %>%
filter(!is.na(gender)) %>%
group_by(household, gender) %>%
summarize(count = n()) %>%
group_by(gender, count) %>%
summarize(households = n()) %>%
arrange(count)
# # A tibble: 6 x 3
# # Groups: gender [2]
# gender count households
# <fct> <int> <int>
# Female 1 1
# Male 1 3
# Female 2 2
# Female 3 1
# Male 4 1
# Male 5 1