In case of additional categorical variable IndVar4 (factor a, b, c) the coefficient table can be more than just a row longer. Adding variable IndVar4:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.7548180 1.4005800 -1.2529223 0.2102340
IndVar1 -0.2830926 1.2076534 -0.2344154 0.8146625
IndVar2 0.1894432 0.1401217 1.3519903 0.1763784
IndVar3 0.1568672 0.2528131 0.6204867 0.5349374
IndVar4b 0.4604571 1.0774018 0.4273773 0.6691045
IndVar4c 0.9084545 1.0943227 0.8301523 0.4064527
Max number of rows is less then all variables + all categories:
max_values <- length(ind_vars) +
sum(sapply( dfPRAC, function(x) pmax(length(levels(x))-1,0)))
So the new corrected function is:
p_values <-
cbind(formula_vec, as.data.frame ( do.call(rbind,
lapply(glm_res, function(x) {
coefs <- coef(x)
rbind(c(coefs[,4] , rep(NA, max_values - length(coefs[,4]) + 1)))
})
)))
But the result is not so clean as with continuous variables. I think Metrics' idea to convert every categorical variable to (levels-1) dummy variables gives same results and maybe cleaner presentation.
Data:
dfPRAC <- structure(list(DepVar1 = c(0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1), DepVar2 = c(0, 1, 0, 0,
1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1),
IndVar1 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
0, 0, 0, 1, 0, 0, 0, 1, 0),
IndVar2 = c(1, 3, 9, 1, 5, 1,
1, 8, 4, 6, 3, 15, 4, 1, 1, 3, 2, 1, 10, 1, 9, 9, 11, 5),
IndVar3 = c(0.500100322564443, 1.64241601558441, 0.622735778490702,
2.42429812749226, 5.10055213237027, 1.38479786027561, 7.24663629203007,
0.5102348706939, 2.91566510995229, 3.73356170379198, 5.42003495939846,
1.29312896116503, 3.33753833987496, 0.91783513806083, 4.7735736131668,
1.17609362602233, 5.58010703426296, 5.6668754863739, 1.4377813063642,
5.07724130837643, 2.4791994535923, 2.55100067348583, 2.41043629522981,
2.14411703944206)), .Names = c("DepVar1", "DepVar2", "IndVar1",
"IndVar2", "IndVar3"), row.names = c(NA, 24L), class = "data.frame")
dfPRAC$IndVar4 <- factor(rep(c("a", "b", "c"),8))
dfPRAC$IndVar5 <- factor(rep(c("d", "e", "f", "g"),6))
Set up the models:
dep_vars <- c("DepVar1", "DepVar2")
ind_vars <- c("IndVar1", "IndVar2", "IndVar3", "IndVar4", "IndVar5")
# create all combinations of ind_vars
ind_vars_comb <-
unlist( sapply( seq_len(length(ind_vars)),
function(i) {
apply( combn(ind_vars,i), 2, function(x) paste(x, collapse = "+"))
}))
# pair with dep_vars:
var_comb <- expand.grid(dep_vars, ind_vars_comb )
# formulas for all combinations
formula_vec <- sprintf("%s ~ %s", var_comb$Var1, var_comb$Var2)
# create models
glm_res <- lapply( formula_vec, function(f) {
fit1 <- glm( f, data = dfPRAC, family = binomial("logit"))
fit1$coefficients <- coef( summary(fit1))
return(fit1)
})
names(glm_res) <- formula_vec