The code below follows in great part this (not SO) post.
It fits an ordinal logistic regression model with MASSS::polr
. It then computes the coefficients' confidence intervals in the odds ratio scale, predicts from the model with the test data set and runs caret::confusionMatrix
on the predictions.
The stars.pval
comes from this post by SO user Maël.
Note that the data is different, the ordinal response is an ordered factor, not a factor like in the question.
set.seed(123)
var1 <- sample(c(0,1), replace=TRUE, size=500)
df <- data.frame(var1 = sample(c(0,1), replace=TRUE, size=500),
var2 = sample(c(0,1), replace=TRUE, size=500),
var3 = sample(c(0,1), replace=TRUE, size=500),
var4 = sample(c(0,1), replace=TRUE, size=500),
var5 = sample(c(0, 1, 2, 3), replace=TRUE, size=500))
df[, 1:4] <- lapply(df[, 1:4] , factor)
df$var5 <- ordered(df$var5)
library(caTools)
library(MASS)
stars.pval <- function(x){
stars <- c("***", "**", "*", "")
var <- c(0, 0.01, 0.05, 0.10, 1)
i <- findInterval(x, var, left.open = T, rightmost.closed = T)
stars[i]
}
set.seed(100)
spl <- sample.split(df$var5, SplitRatio = 0.7)
train <- subset(df, spl)
test <- subset(df, !spl)
fit <- polr(var5 ~ ., train, Hess = TRUE)
AIC(fit)
#> [1] 980.9098
BIC(fit)
#> [1] 1007.915
ctable <- coef(summary(fit))
## calculate and store p values
p <- pnorm(abs(ctable[, "t value"]), lower.tail = FALSE) * 2
## combined table
(ctable <- cbind(ctable, "p value" = p))
#> Value Std. Error t value p value
#> var11 -0.16090820 0.1921110 -0.83757930 4.022670e-01
#> var21 0.19389705 0.1923504 1.00804088 3.134348e-01
#> var31 0.01078333 0.1932800 0.05579122 9.555081e-01
#> var41 -0.01309085 0.1931528 -0.06777458 9.459651e-01
#> 0|1 -0.94816966 0.2171773 -4.36587776 1.266133e-05
#> 1|2 0.02603120 0.2102911 0.12378653 9.014843e-01
#> 2|3 1.13948524 0.2186379 5.21174533 1.870722e-07
cbind.data.frame(ctable, ` ` = stars.pval(p)) |> print(digits = 5)
#> Value Std. Error t value p value
#> var11 -0.160908 0.19211 -0.837579 4.0227e-01
#> var21 0.193897 0.19235 1.008041 3.1343e-01
#> var31 0.010783 0.19328 0.055791 9.5551e-01
#> var41 -0.013091 0.19315 -0.067775 9.4597e-01
#> 0|1 -0.948170 0.21718 -4.365878 1.2661e-05 ***
#> 1|2 0.026031 0.21029 0.123787 9.0148e-01
#> 2|3 1.139485 0.21864 5.211745 1.8707e-07 ***
ci <- confint(fit)
#> Waiting for profiling to be done...
exp(cbind(OR = coef(fit), ci))
#> OR 2.5 % 97.5 %
#> var11 0.8513702 0.5838532 1.240404
#> var21 1.2139713 0.8328939 1.771160
#> var31 1.0108417 0.6919196 1.476772
#> var41 0.9869945 0.6757277 1.441490
var5_pred <- predict(fit, newdata = test)
caret::confusionMatrix(test$var5, var5_pred)
#> Confusion Matrix and Statistics
#>
#> Reference
#> Prediction 0 1 2 3
#> 0 33 0 0 9
#> 1 31 0 0 3
#> 2 29 0 0 8
#> 3 27 0 0 10
#>
#> Overall Statistics
#>
#> Accuracy : 0.2867
#> 95% CI : (0.2159, 0.3661)
#> No Information Rate : 0.8
#> P-Value [Acc > NIR] : 1
#>
#> Kappa : 0.0183
#>
#> Mcnemar's Test P-Value : NA
#>
#> Statistics by Class:
#>
#> Class: 0 Class: 1 Class: 2 Class: 3
#> Sensitivity 0.2750 NA NA 0.33333
#> Specificity 0.7000 0.7733 0.7533 0.77500
#> Pos Pred Value 0.7857 NA NA 0.27027
#> Neg Pred Value 0.1944 NA NA 0.82301
#> Prevalence 0.8000 0.0000 0.0000 0.20000
#> Detection Rate 0.2200 0.0000 0.0000 0.06667
#> Detection Prevalence 0.2800 0.2267 0.2467 0.24667
#> Balanced Accuracy 0.4875 NA NA 0.55417
Created on 2023-08-15 with reprex v2.0.2