reshape
Doing it that way seems a little inefficient; it appears to be just a pivoting/reshaping operation, so this is a one-shot deal:
df2 <- reshape2::dcast(df, id + region ~ region, value.var = "region")
df2[,unique(df2$region)] <- lapply(df2[,unique(df2$region)], function(z) +!is.na(z))
df2
# id region Africa America Asia Europe
# 1 1 Asia 0 0 1 0
# 2 2 Africa 1 0 0 0
# 3 3 Europe 0 0 0 1
# 4 4 America 0 1 0 0
# 5 5 Asia 0 0 1 0
# 6 6 Africa 1 0 0 0
# 7 7 Europe 0 0 0 1
# 8 8 America 0 1 0 0
# 9 9 Asia 0 0 1 0
# 10 10 Africa 1 0 0 0
The dcast
pivots (while preserving the original "region"
column); the intermediate value (immed after dcast
) is
reshape2::dcast(df, id+region~region, value.var="region")
# id region Africa America Asia Europe
# 1 1 Asia <NA> <NA> Asia <NA>
# 2 2 Africa Africa <NA> <NA> <NA>
# 3 3 Europe <NA> <NA> <NA> Europe
# 4 4 America <NA> America <NA> <NA>
# 5 5 Asia <NA> <NA> Asia <NA>
# 6 6 Africa Africa <NA> <NA> <NA>
# 7 7 Europe <NA> <NA> <NA> Europe
# 8 8 America <NA> America <NA> <NA>
# 9 9 Asia <NA> <NA> Asia <NA>
# 10 10 Africa Africa <NA> <NA> <NA>
so all we need to do is convert those from strings/NA
s to "is or is not NA
", which is done using +!is.na(z)
.
base R, not reshaping
uniqregion <- unique(df$region)
tmp <- +outer(df$region, unique(df$region), `==`)
colnames(tmp) <- uniqregion
tmp
# Asia Africa Europe America
# [1,] 1 0 0 0
# [2,] 0 1 0 0
# [3,] 0 0 1 0
# [4,] 0 0 0 1
# [5,] 1 0 0 0
# [6,] 0 1 0 0
# [7,] 0 0 1 0
# [8,] 0 0 0 1
# [9,] 1 0 0 0
# [10,] 0 1 0 0
cbind(df, tmp)
# id region Asia Africa Europe America
# 1 1 Asia 1 0 0 0
# 2 2 Africa 0 1 0 0
# 3 3 Europe 0 0 1 0
# 4 4 America 0 0 0 1
# 5 5 Asia 1 0 0 0
# 6 6 Africa 0 1 0 0
# 7 7 Europe 0 0 1 0
# 8 8 America 0 0 0 1
# 9 9 Asia 1 0 0 0
# 10 10 Africa 0 1 0 0
Literal function
If you really want a function to loop over it, though, I still recommend lapply
over a for
loop:
binary <- function(data2, variable) {
uniq <- unique(data2[[variable]])
cbind(data2, as.data.frame(
lapply(setNames(nm = uniq),
function(z) +(z == data2[[variable]]) )
))
}
binary(df, "region")
# id region Asia Africa Europe America
# 1 1 Asia 1 0 0 0
# 2 2 Africa 0 1 0 0
# 3 3 Europe 0 0 1 0
# 4 4 America 0 0 0 1
# 5 5 Asia 1 0 0 0
# 6 6 Africa 0 1 0 0
# 7 7 Europe 0 0 1 0
# 8 8 America 0 0 0 1
# 9 9 Asia 1 0 0 0
# 10 10 Africa 0 1 0 0
(You might consider not cbind(data2,
here, instead just returning the Asia:America
columns, allowing the calling function (user) to determine what to do with it; perhaps that's too OCD/generalizing. Just a thought.)
Literal function using for
loop
But if you really must have it ...
binary2 <- function(data2, variable) {
uniq <- unique(data2[[variable]])
for (nm in uniq) {
data2[[nm]] <- +(data2[[variable]] == nm)
}
data2
}
binary2(df, "region")
# id region Asia Africa Europe America
# 1 1 Asia 1 0 0 0
# 2 2 Africa 0 1 0 0
# 3 3 Europe 0 0 1 0
# 4 4 America 0 0 0 1
# 5 5 Asia 1 0 0 0
# 6 6 Africa 0 1 0 0
# 7 7 Europe 0 0 1 0
# 8 8 America 0 0 0 1
# 9 9 Asia 1 0 0 0
# 10 10 Africa 0 1 0 0