I'm looking to run a hierarchical logistic regression in a Bayesian framework, but am having trouble adapting codes for my data. I have the great book "Doing Bayesian Data Analysis", but I'm not sure how to modify the script the author provided (will paste below) to rerun the analysis on my thesis data. Specifically, I have the following questions:
- How do I add more terms to this model? I had 5 predictors in my thesis, and this model only has 2
- How do I make it hierarchical/include multiple steps or blocks?
- How do I add interaction terms among my predictors?
- I believe this script is set up for metric predictors (the book's example used height and weight data); If I am running a mixture of metric and nominal independent variables, how do I need to adapt it?
any help on any of these questions would be wonderful.
# Jags-Ydich-XmetMulti-Mlogistic.R
# Accompanies the book:
# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:
# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.
source("DBDA2E-utilities.R")
#===============================================================================
genMCMC = function( data , xName="x" , yName="y" ,
numSavedSteps=10000 , thinSteps=1 , saveName=NULL ,
runjagsMethod=runjagsMethodDefault ,
nChains=nChainsDefault ) {
require(runjags)
#-----------------------------------------------------------------------------
# THE DATA.
y = data[,yName]
x = as.matrix(data[,xName],ncol=length(xName))
# Do some checking that data make sense:
if ( any( !is.finite(y) ) ) { stop("All y values must be finite.") }
if ( any( !is.finite(x) ) ) { stop("All x values must be finite.") }
cat("\nCORRELATION MATRIX OF PREDICTORS:\n ")
show( round(cor(x),3) )
cat("\n")
flush.console()
# Specify the data in a list, for later shipment to JAGS:
dataList = list(
x = x ,
y = y ,
Nx = dim(x)[2] ,
Ntotal = dim(x)[1]
)
#-----------------------------------------------------------------------------
# THE MODEL.
modelString = "
# Standardize the data:
data {
for ( j in 1:Nx ) {
xm[j] <- mean(x[,j])
xsd[j] <- sd(x[,j])
for ( i in 1:Ntotal ) {
zx[i,j] <- ( x[i,j] - xm[j] ) / xsd[j]
}
}
}
# Specify the model for standardized data:
model {
for ( i in 1:Ntotal ) {
# In JAGS, ilogit is logistic:
y[i] ~ dbern( ilogit( zbeta0 + sum( zbeta[1:Nx] * zx[i,1:Nx] ) ) )
}
# Priors vague on standardized scale:
zbeta0 ~ dnorm( 0 , 1/2^2 )
for ( j in 1:Nx ) {
zbeta[j] ~ dnorm( 0 , 1/2^2 )
}
# Transform to original scale:
beta[1:Nx] <- zbeta[1:Nx] / xsd[1:Nx]
beta0 <- zbeta0 - sum( zbeta[1:Nx] * xm[1:Nx] / xsd[1:Nx] )
}
" # close quote for modelString
# Write out modelString to a text file
writeLines( modelString , con="TEMPmodel.txt" )
#-----------------------------------------------------------------------------
# INTIALIZE THE CHAINS.
# Let JAGS do it...
#-----------------------------------------------------------------------------
# RUN THE CHAINS
parameters = c( "beta0" , "beta" ,
"zbeta0" , "zbeta" )
adaptSteps = 500 # Number of steps to "tune" the samplers
burnInSteps = 1000
runJagsOut <- run.jags( method=runjagsMethod ,
model="TEMPmodel.txt" ,
monitor=parameters ,
data=dataList ,
#inits=initsList ,
n.chains=nChains ,
adapt=adaptSteps ,
burnin=burnInSteps ,
sample=ceiling(numSavedSteps/nChains) ,
thin=thinSteps ,
summarise=FALSE ,
plots=FALSE )
codaSamples = as.mcmc.list( runJagsOut )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
if ( !is.null(saveName) ) {
save( codaSamples , file=paste(saveName,"Mcmc.Rdata",sep="") )
}
return( codaSamples )
} # end function
#===============================================================================
smryMCMC = function( codaSamples ,
saveName=NULL ) {
summaryInfo = NULL
mcmcMat = as.matrix(codaSamples)
paramName = colnames(mcmcMat)
for ( pName in paramName ) {
summaryInfo = rbind( summaryInfo , summarizePost( mcmcMat[,pName] ) )
}
rownames(summaryInfo) = paramName
if ( !is.null(saveName) ) {
write.csv( summaryInfo , file=paste(saveName,"SummaryInfo.csv",sep="") )
}
return( summaryInfo )
}
#===============================================================================
plotMCMC = function( codaSamples , data , xName="x" , yName="y" ,
showCurve=FALSE , pairsPlot=FALSE ,
saveName=NULL , saveType="jpg" ) {
# showCurve is TRUE or FALSE and indicates whether the posterior should
# be displayed as a histogram (by default) or by an approximate curve.
# pairsPlot is TRUE or FALSE and indicates whether scatterplots of pairs
# of parameters should be displayed.
#-----------------------------------------------------------------------------
y = data[,yName]
x = as.matrix(data[,xName])
mcmcMat = as.matrix(codaSamples,chains=TRUE)
chainLength = NROW( mcmcMat )
zbeta0 = mcmcMat[,"zbeta0"]
zbeta = mcmcMat[,grep("^zbeta$|^zbeta\\[",colnames(mcmcMat))]
if ( ncol(x)==1 ) { zbeta = matrix( zbeta , ncol=1 ) }
beta0 = mcmcMat[,"beta0"]
beta = mcmcMat[,grep("^beta$|^beta\\[",colnames(mcmcMat))]
if ( ncol(x)==1 ) { beta = matrix( beta , ncol=1 ) }
#-----------------------------------------------------------------------------
if ( pairsPlot ) {
# Plot the parameters pairwise, to see correlations:
openGraph()
nPtToPlot = 1000
plotIdx = floor(seq(1,chainLength,by=chainLength/nPtToPlot))
panel.cor = function(x, y, digits=2, prefix="", cex.cor, ...) {
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r = (cor(x, y))
txt = format(c(r, 0.123456789), digits=digits)[1]
txt = paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex=1.5 ) # was cex=cex.cor*r
}
pairs( cbind( beta0 , beta )[plotIdx,] ,
labels=c( "beta[0]" ,
paste0("beta[",1:ncol(beta),"]\n",xName) ) ,
lower.panel=panel.cor , col="skyblue" )
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"PostPairs",sep=""), type=saveType)
}
}
#-----------------------------------------------------------------------------
# Data with posterior predictive:
# If only 1 predictor:
if ( ncol(x)==1 ) {
openGraph(width=7,height=6)
par( mar=c(3.5,3.5,2,1) , mgp=c(2.0,0.7,0) )
plot( x[,1] , y , xlab=xName[1] , ylab=yName ,
cex=2.0 , cex.lab=1.5 , col="black" , main="Data with Post. Pred." )
abline(h=0.5,lty="dotted")
cVec = floor(seq(1,chainLength,length=30))
xWid=max(x)-min(x)
xComb = seq(min(x)-0.1*xWid,max(x)+0.1*xWid,length=201)
for ( cIdx in cVec ) {
lines( xComb , 1/(1+exp(-(beta0[cIdx]+beta[cIdx,1]*xComb ))) , lwd=1.5 ,
col="skyblue" )
xInt = -beta0[cIdx]/beta[cIdx,1]
arrows( xInt,0.5, xInt,-0.04, length=0.1 , col="skyblue" , lty="dashed" )
}
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"DataThresh",sep=""), type=saveType)
}
}
# If only 2 predictors:
if ( ncol(x)==2 ) {
openGraph(width=7,height=7)
par( mar=c(3.5,3.5,2,1) , mgp=c(2.0,0.7,0) )
plot( x[,1] , x[,2] , pch=as.character(y) , xlab=xName[1] , ylab=xName[2] ,
col="black" , main="Data with Post. Pred.")
cVec = floor(seq(1,chainLength,length=30))
for ( cIdx in cVec ) {
abline( -beta0[cIdx]/beta[cIdx,2] , -beta[cIdx,1]/beta[cIdx,2] , col="skyblue" )
}
if ( !is.null(saveName) ) {
saveGraph( file=paste(saveName,"DataThresh",sep=""), type=saveType)
}
}
#-----------------------------------------------------------------------------
# Marginal histograms:
decideOpenGraph = function( panelCount , saveName , finished=FALSE ,
nRow=1 , nCol=3 ) {
# If finishing a set:
if ( finished==TRUE ) {
if ( !is.null(saveName) ) {
saveGraph( file=paste0(saveName,ceiling((panelCount-1)/(nRow*nCol))),
type=saveType)
}
panelCount = 1 # re-set panelCount
return(panelCount)
} else {
# If this is first panel of a graph:
if ( ( panelCount %% (nRow*nCol) ) == 1 ) {
# If previous graph was open, save previous one:
if ( panelCount>1 & !is.null(saveName) ) {
saveGraph( file=paste0(saveName,(panelCount%/%(nRow*nCol))),
type=saveType)
}
# Open new graph
openGraph(width=nCol*7.0/3,height=nRow*2.0)
layout( matrix( 1:(nRow*nCol) , nrow=nRow, byrow=TRUE ) )
par( mar=c(4,4,2.5,0.5) , mgp=c(2.5,0.7,0) )
}
# Increment and return panel count:
panelCount = panelCount+1
return(panelCount)
}
}
# Original scale:
panelCount = 1
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMarg") )
histInfo = plotPost( beta0 , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(beta[0]) , main="Intercept" )
for ( bIdx in 1:ncol(beta) ) {
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMarg") )
histInfo = plotPost( beta[,bIdx] , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(beta[.(bIdx)]) , main=xName[bIdx] )
}
panelCount = decideOpenGraph( panelCount , finished=TRUE , saveName=paste0(saveName,"PostMarg") )
# Standardized scale:
panelCount = 1
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMargZ") )
histInfo = plotPost( zbeta0 , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(z*beta[0]) , main="Intercept" )
for ( bIdx in 1:ncol(beta) ) {
panelCount = decideOpenGraph( panelCount , saveName=paste0(saveName,"PostMargZ") )
histInfo = plotPost( zbeta[,bIdx] , cex.lab = 1.75 , showCurve=showCurve ,
xlab=bquote(z*beta[.(bIdx)]) , main=xName[bIdx] )
}
panelCount = decideOpenGraph( panelCount , finished=TRUE , saveName=paste0(saveName,"PostMargZ") )
#-----------------------------------------------------------------------------
}
#===============================================================================