So for my analysis I need to create 4 subsamples in my sample:
I want to compare Voters' turnout percentage among
1) Voters with 0 other voters in the household
2) Voters with 1 other voter in the household
3) Voters with 2 other voters in the household
4) Voters with 3+ other voters in the household
I have 7 variables for that:
1) Size of Household (vn437)
2) Age of 2nd person in HH (vn438a)
3) Age of 3rd person in HH (vn438b)
and so on until 6th person's age in HH
In my logic I would need to create 4 subsamples, for each group:
First group (0 other voters in the HH) would be observations that fulfill:
d$vn437 == 1;
d$vn437 == 2 & d$vn438a < 18;
d$vn437 == 3 & d$vn438a < 18 & d$vn438b < 18
d$vn437 == 4 & d$vn438a < 18 & d$vn438b < 18 & d$vn438c < 18;
and so on until I finish with 'd$vn438e < 18'
I am an ultra noob with R and I have no idea how to go about this.
How would I create these groups? I am really desperate and I've been looking for hours to no avail!
As Richard Telford suggested, here is the output of the 'dput(head(d))
command:
structure(list(dat = c(20091026, 20091025, 20091025, 20091026,
20091025, 20091025), vn1 = c(1, 2, 1, 1, 1, 1), vn542 = c(27,
22, 25, 23, 24, 22), vn217 = c(4, 3, 2, 4, 3, 3), n111 = c(1,
1, 1, 2, 1, 1), vn437 = c(2, 2, 2, 2, 2, 2), vn438a = c(28, 24,
24, 24, 23, 25), vn438b = c(1000, 1000, 1000, 1000, 1000, 1000
), vn438c = c(1000, 1000, 1000, 1000, 1000, 1000), vn438d = c(1000,
1000, 1000, 1000, 1000, 1000), vn438e = c(1000, 1000, 1000, 1000,
1000, 1000), vn5 = c(4, 4, 4, 4, 4, 4), vn9a = c(5, 5, 5, 5,
5, 5), vn75 = c(1, 1, 3, 2, 1, 3), vn79 = c(2, 2, 2, 2, 2, 2)), .Names = c("dat",
"vn1", "vn542", "vn217", "n111", "vn437", "vn438a", "vn438b",
"vn438c", "vn438d", "vn438e", "vn5", "vn9a", "vn75", "vn79"), row.names = c(2174L,
2175L, 2177L, 2178L, 2180L, 2181L), class = "data.frame")
The vn438b = "1000"
is the NA values but if I removed them I would lose other observations so I did not clean the Nth person in HH's age variables.
Also here is what I want my outcome to look like in the end
EDIT
Managed to solve it on my own. For anyone interested, here's my code:
# changing variable names into understandable names
colnames(d)[2] <- "sex"
colnames(d)[3] <- "age"
colnames(d)[4] <- "polint"
colnames(d)[5] <- "turnout"
colnames(d)[6] <- "HHsize"
colnames(d)[7] <- "HHage2"
colnames(d)[8] <- "HHage3"
colnames(d)[9] <- "HHage4"
colnames(d)[10] <- "HHage5"
colnames(d)[11] <- "HHage6"
colnames(d)[12] <- "marital"
colnames(d)[13] <- "education"
colnames(d)[14] <- "income"
colnames(d)[15] <- "religion"
####################################################################
## creating subsets: no other voters in HH --> combine them later ##
####################################################################
noHHM <- d[d$HHsize==1, ]
noHHM1 <- d[d$HHsize==2 & d$HHage2<18, ]
noHHM2 <- d[d$HHsize==3 & d$HHage2<18 & d$HHage3<18, ]
noHHM3 <- d[d$HHsize==4 & d$HHage2<18 & d$HHage3<18 & d$HHage4<18, ]
noHHM4 <- d[d$HHsize==5 & d$HHage2<18 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18, ]
#at this point no more observations match specifications so we remove noHHM4
rm(noHHM4)
#merging the noHHM variables
zeroHHM <- rbind(noHHM, noHHM1, noHHM2, noHHM3)
#removing intermediate variables now
rm(noHHM, noHHM1, noHHM2, noHHM3)
#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth0 <- zeroHHM[zeroHHM$age <26, ]
Old0 <- zeroHHM[zeroHHM$age >25, ]
##################################################
## repeat whole process for 1 other voter in HH ##
##################################################
one1HHM <- d[d$HHsize==2 & d$HHage2>17, ]
oneHHM1 <- d[d$HHsize==3 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18, ]
oneHHM2 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18, ]
oneHHM3 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18, ]
oneHHM4 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3<18 & d$HHage4<18 & d$HHage5<18 & d$HHage6<18, ]
#merging the oneHHM variables
oneHHM <- rbind(one1HHM, oneHHM1, oneHHM2, oneHHM3, oneHHM4)
#removing intermediate variables now
rm(one1HHM, oneHHM1, oneHHM2, oneHHM3, oneHHM4)
#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth1 <- oneHHM[zeroHHM$age <26, ]
Old1 <- oneHHM[zeroHHM$age >25, ]
###################################################
## repeat whole process for 2 other voters in HH ##
###################################################
twoHHM1 <- d[d$HHsize==3 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900, ]
twoHHM2 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18, ]
twoHHM3 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18 & d$HHage5<18, ]
twoHHM4 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4<18 & d$HHage5<18 & d$HHage6<18, ]
#merging the the twoHHM variables
twoHHM <- rbind(twoHHM1, twoHHM2, twoHHM3, twoHHM4)
#removing intermediate variables
rm(twoHHM1, twoHHM2, twoHHM3, twoHHM4)
#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth2 <- twoHHM[zeroHHM$age <26, ]
Old2 <- twoHHM[zeroHHM$age >25, ]
####################################################
## repeat whole process for 3+ other voters in HH ##
####################################################
threeHHM1 <- d[d$HHsize==4 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]
threeHHM2 <- d[d$HHsize==5 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]
threeHHM3 <- d[d$HHsize==6 & d$HHage2>17 & d$HHage2<900 & d$HHage3>17 & d$HHage3<900 & d$HHage4>17 & d$HHage4<900, ]
#merging the the threeHHM variables
threeHHM <- rbind(threeHHM1, threeHHM2, threeHHM3)
#removing intermediate variables
rm(threeHHM1, threeHHM2, threeHHM3)
#creating two subsets (youth voters [under 25] and non youth voters [over 25])
Youth3 <- threeHHM[zeroHHM$age <26, ]
Old3 <- threeHHM[zeroHHM$age >25, ]
#renaming the bigsets
HHM0 <- zeroHHM
HHM1 <- oneHHM
HHM2 <- twoHHM
HHM3 <- threeHHM
#removing old name bigsets
rm(zeroHHM, oneHHM, twoHHM, threeHHM)