0

I am using a bootstrap function (code below), which produces the following data-frame:

df <- structure(c(-30.52, 0.3759, 0.2377, -0.05084, -0.07212, 0.01609, 
14.73, 0.2081, 0.3742, 0.07192, 0.06808, 0.007358, -2.072, 1.806, 
0.6354, -0.707, -1.059, 2.187, 0.03825, 0.07087, 0.5252, 0.4796, 
0.2894, 0.02875, -62.29, -0.101, -0.4556, -0.1735, -0.1758, 0.0008726, 
0.09988, 0.6859, 0.9313, 0.1033, 0.07789, 0.03205), .Dim = c(6L, 
6L), .Dimnames = list(c("(Intercept)", "GroupB", "GroupC", "resids(reduced.form)", 
"random_variable", "year"), c("b", "SE", "z", "p", "2.5%", "97.5%"
)))

                             b        SE       z       p       2.5%   97.5%
(Intercept)          -30.52000 14.730000 -2.0720 0.03825 -6.229e+01 0.09988
GroupB                 0.37590  0.208100  1.8060 0.07087 -1.010e-01 0.68590
GroupC                 0.23770  0.374200  0.6354 0.52520 -4.556e-01 0.93130
resids(reduced.form)  -0.05084  0.071920 -0.7070 0.47960 -1.735e-01 0.10330
random_variable       -0.07212  0.068080 -1.0590 0.28940 -1.758e-01 0.07789
year                   0.01609  0.007358  2.1870 0.02875  8.726e-04 0.03205

My issue is that I want to calculate the Average Partial Effects (APE) / Average Marginal Effects (AME), with the margins package. But this package, as far as I can see, does not accept this output.

Is there a way to replace the values in the glm model with the bootstrapped values?

Bootstrap

set.seed(2)
sandbox$Group <- as.factor(sandbox$Group)
reduced.form <- polr(Group ~ z + random_variable + year, data=sandbox)
consistent.glm <- glm(y ~ Group + resids(reduced.form) + random_variable + year, family="quasibinomial", data=sandbox)

FUN <- function(x) {
  reduced.form <-  polr(Group ~ z + random_variable + year, data=x)
  fit <- glm(y ~ Group + resids(reduced.form) + random_variable + year,family="quasibinomial", data=x)
  fit$coefficients
}

set.seed(42)
R <- 10
bs <- t(replicate(R, FUN(sandbox[sample(nrow(sandbox), nrow(sandbox), replace=T), ])))

# To scrape out a summary, the matrixStats package is most convenient.

library(matrixStats)
b <- consistent.glm$coefficients
SE <- colSds(bs)
z <- b/SE
p <- 2 * pt(-abs(z), df = Inf)
ci <- colQuantiles(bs, probs=c(.025, .975))
res <- signif(cbind(b, SE, z, p, ci), 4)
res

DATA

a    <- 2    # structural parameter of interest
b    <- 1    # strength of instrument
rho  <- 0.5  # degree of endogeneity

N    <- 1000
z    <- rnorm(N)
res1 <- rnorm(N)
res2 <- res1*rho + sqrt(1-rho*rho)*rnorm(N)
x    <- z*b + res1
ys   <- x*a + res2
d    <- (ys>0) #dummy variable
y    <- (10-(d*ys))/10
random_variable <- rnorm(100, mean = 0, sd = 1)

library(data.table)
DT_1 <- data.frame(y,x,z, random_variable)
DT_2 <- structure(list(ID = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
                              13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 
                              29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 
                              45, 46, 47, 48, 49, 50), year = c(1995, 1995, 1995, 1995, 1995, 
                                                                1995, 1995, 1995, 1995, 1995, 2000, 2000, 2000, 2000, 2000, 2000, 
                                                                2000, 2000, 2000, 2000, 2005, 2005, 2005, 2005, 2005, 2005, 2005, 
                                                                2005, 2005, 2005, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 
                                                                2010, 2010, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 
                                                                2015), Group = c("A", "A", "A", "A", "B", "B", "B", "B", "C", 
                                                                                 "C", "A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "A", "A", 
                                                                                 "A", "A", "B", "B", "B", "B", "C", "C", "A", "A", "A", "A", "B", 
                                                                                 "B", "B", "B", "C", "C", "A", "A", "A", "A", "B", "B", "B", "B", 
                                                                                 "C", "C"), event = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
                                                                                                      1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 
                                                                                                      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), win_or_lose = c(-1, 
                                                                                                                                                                    -1, -1, -1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                                                                                                                                    0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 1, 1, 1, 1, 0, 0, 
                                                                                                                                                                    -1, -1, -1, -1, 1, 1, 1, 1, 0, 0)), row.names = c(NA, -50L), class = c("tbl_df", 
                                                                                                                                                                                                                                           "tbl", "data.frame"))
DT_1 <- setDT(DT_1)
DT_2 <- setDT(DT_2)
DT_2 <- rbind(DT_2 , DT_2 [rep(1:50, 19), ])
sandbox <- cbind(DT_1, DT_2)
sandbox <- setDT(sandbox)[y<0, y:=0]
Tom
  • 2,173
  • 1
  • 17
  • 44
  • 1
    Looks to me that `margins` calls `predictions:::prediction.glm` which calls `base::predict.glm`. One approach would be to give your `glm` object a new subclass, and define your own `predict` method for it. Or you may even be able to go in at a deeper level and define your own `vcov` method. – dash2 Apr 24 '21 at 17:03
  • Thank you for your comment @dash2. Any chance you could help getting me started haha? I've hardly written any of the posted code myself. What you are suggesting appears to be a bit over my head.. – Tom Apr 24 '21 at 17:14
  • Yeah, this isn't gonna be an easy task, you'll need to delve into the internals of glm and margins. It might be easier - and more insightful - to do the calculation yourself. Or, does margins have an option to pass in a vcov matrix for the predictors? If not, you might ask Thomas Leeper to put one in... – dash2 Apr 26 '21 at 09:19

0 Answers0