R Log on MCMC, Final 2 Classes of Fall 2017 STAT 705 ==================================================== Dec.6 & 11, 2017 Examples of Statistical Uses of Bayes Posteriors Obtained by MCMC ================================================================= (1) Recall "Nuclear Pump" Example from Dec.4 Log (Class Log 15). > t(NucPmp) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] Fail 5.0 1.0 5.0 14.0 3.0 19.0 1.0 1 4.0 22.0 Tim 94.3 15.7 62.9 125.8 5.2 31.4 1.1 1 2.1 10.5 ## lam1,...,lam10 were the parameters for failure rates on the 10 pumps, ## and beta was the prior parameter for the lam[i] ~ Gamma(1.8,beta) We generated a block of 5000 successive MCMC 10-vector values > LamBlock = tmp[,1:10] ## from the posterior density for lam1..lam10 ## Compare posterior means and medians with MLEs > round(rbind(MLE = NucPmp[,1]/NucPmp[,2], Post.Exp = apply(LamBlock,2,mean), Post.med = apply(LamBlock,2,median)), 3) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] MLE 0.053 0.064 0.079 0.111 0.577 0.605 0.909 1.000 1.905 2.095 Post.Exp 0.071 0.154 0.104 0.123 0.632 0.616 0.802 0.812 1.284 1.836 Post.med 0.067 0.134 0.099 0.121 0.584 0.607 0.689 0.703 1.206 1.803 ## Discuss the effect of a shared prior parameter !! ## THE MAIN POINT IS THAT THE BAYES FORMULATION CAUSES THE HIGHEST PUMP ## ESTIMATES TO BE SHADED DOWN AND THE SMALLES TO BE SHADED UP. ## After looking at the posterior for beta, we find expectation > mean(tmp[,11]) [1] 2.511802 ## which suggests that the estimated prior mean for the lam[i] values was > 1.8/2.5118 = 0.7166 ### a point toward which posterior lambda's are pushed > plot(density(LamBlock[,1]), xlim=c(0,2.3), xlab="lambda", main="Overlaid Posterior densities for Pump lambda's") for(j in 2:10) lines(density(LamBlock[,j]), lty=j) legend(locator(), legend=paste0("Pump ",1:10), lty=1:10) ### saved as "PumpLambdas.pdf" # THE VERY NARROW POSTERIOR LAMBDA DENSITIES ARE THE ONES WITH LARGE t_i's. ## We can further obtain 95% Bayesian credible intervals of lambda_i's as > round(rbind(med=apply(LamBlock,2,median), apply(LamBlock, 2, function(col) quantile(col, prob=c(.025,.975)))),3) [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] med 0.067 0.134 0.099 0.121 0.584 0.607 0.689 0.703 1.206 1.803 2.5% 0.028 0.029 0.042 0.070 0.191 0.377 0.141 0.139 0.439 1.153 97.5% 0.135 0.385 0.193 0.191 1.338 0.918 2.073 2.144 2.648 2.683 ## Instead of the density estimate we can get by applying "density" to ## LamBlock[,i], another method is to use the full conditional ## f(lam[i] | beta) ~ Gamma(ni+1.8, ti+beta) averaged over beta > plot(density(LamBlock[,6]), xlab="lambda", main="Density estimates for lambda[6]") curve( apply(outer(x,tmp[,11], function(x,y) dgamma(x, NucPmp[6,"Fail"]+1.8, NucPmp[6,"Tim"]+y)),1,mean), add=T, col="blue") Another Gibbs Sampler Example ============================= OBJECTIVE: to simulate from "circular" Markov chain X1,...,X20 in which X1 | X2,...,X20 = .3*X2 + .3*X20 + 0.4*rlogis(1) Xj | X2,...,X_{j-1},X_{j+1}...X20 = .3*X_{j-1}+.3*X_{j+1} + 0.4*rlogis(1) X20 | X1,...,X19 = .3*X19 + .3*X1 + 0.4*rlogis(1) where all of the logistic errors are independent of the X's conditioned on. #-------------------------------------------------- tmp = rlogis(20) Xmat = array(0, c(1.5e4,20)) for(i in 1:1.5e4) { tmp[1] = .3*tmp[2]+.3*tmp[20]+.4*rlogis(1) for(j in 2:19) tmp[j] = .3*tmp[j-1]+.3*tmp[j]+.4*rlogis(1) tmp[20] = .3*tmp[1]+.3*tmp[19]+.4*rlogis(1) Xmat[i,] = tmp } plot(density(c(Xmat[5001:13000,])), xlab="Xj density", main="Estimated Density of Xj") lines(density(c(Xmat[13001:15000,17])), col="red") ### Close agreement suggests convergence Metropolis-Hastings Algorithm Examples ====================================== (O) [See last segment of Rlogs/MCMC_Rlog09.txt for univariate examples, extended here.] [Also see at p.5 in the notes https://www.math.umd.edu/~slud/Mini.MCMC/Lec1Figs/MCMCtalk.pdf how the Metropolis-Hastings with proposal q(x,y) depending on y alone can be viewed as direct generalizations of accept-reject methods.] (1) Simulate from: f(x) = C* exp(x[1] + x[2] - (1+x[1]^4)*(1+x[2]^4)) ### Here we could try a method with full conditionals, but these are messy ### and not easy to simulate from. Try proposal kernel q(x, y) = (1/pi) * exp(-0.5 + y[1] + y[2] - y[1]^2- y[2]^2) ~ bivariate normal indep with variances 0.5 and means 0.5 logqker = function(xv,yv) yv[1] + yv[2] - yv[1]^2- yv[2]^2 lfdens = function(xv) xv[1] + xv[2] - (1+xv[1]^4)*(1+xv[2]^4) Xmat = array(0,c(1.5e4,2)) tmp = c(0,0) Uvec = runif(1.5e4) for(i in 1:1.5e4) { aux = rnorm(2,0.5,sqrt(0.5)) rho = exp(lfdens(aux) - lfdens(tmp) + logqker(aux,tmp) -logqker(tmp,aux)) if (Uvec[i] < rho) tmp = aux Xmat[i,] = tmp } > sum(Xmat[-1,1] == Xmat[-1.5e4,1]) [1] 5511 ### so roughly 1 out of 3 steps "rejected" > hist(Xmat[1001:1e4,1], prob=T, nclass=50) lines(density(Xmat[1001:1e4,1]), col="red") ### How do we check this ??? fmarg = function(x) { m = length(x) out = numeric(m) for(i in 1:m) out[i] = integrate(function(y) exp(x[i]+y-(1+x[i]^4)*(1+y^4)),-Inf,Inf)$val out } > integrate(fmarg,-Inf,Inf) 1.574694 with absolute error < 5.5e-05 > hist(Xmat[1001:1.e4,1], prob=T, nclass=50, xlab="coord 1", main= "Overplotted Estimated Densities from MCMC") lines(density(Xmat[1001:1.e4,1]), col="red") curve(fmarg(x)/1.574694, -2,2, add=T, col="blue") lines(density(Xmat[10001:1.5e4,1]), col="green") ### IN THIS CASE, WE DO NOT HAVE TO WORRY ABOUT END EFFECTS OF THE DENSITY ESTIMATE, ### SO THE PLOT SHOWING AGREEMENT BETWEEN THE HISTOGRAM AND ITS (red) DENSITY ESTIMATE ### IS COMPLETELY PREDICTABLE. BUT THE AGREEMENT BETWEEN THAT DENSITY ESTIMATE AND THE ### ONE OBTAINED BY AVERAGING THE CONDITIONAL DENSITY GIVES EXTRA CONFIRMATION THAT ### THE LATTER IS PROBABLY THE MORE RELIABLE AND LESS ARBITRARY DENSITY-ESTIMTE CHOICE. ### Now iterate many more times to check final convergence XmatA = array(0,c(1.5e4,2)) tmp = c(0,0) for(j in 1:50) { Uvec = runif(1.5e4) for(i in 1:1.5e4) { aux = rnorm(2,0.5,sqrt(0.5)) rho = exp(lfdens(aux) - lfdens(tmp) + logqker(aux,tmp) -logqker(tmp,aux)) if (Uvec[i] < rho) tmp = aux XmatA[i,] = tmp } } lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple") ### So we confirm that convergence happened quickly despite "wasted" steps. (2) METROPOLIS-HASTINGS EXAMPLE WITH q(x,y) DEPENDING ON BOTH X,Y. ## First let's re-do the last example using q(x,y) to be the kernel for a random walk with ## small independent normal steps in R^2; and NB: q(x,y) = prod(dnorm(x-y,0,.1)) = q(y,x) tmp = c(0,0) lfdens = function(xv) xv[1] + xv[2] - (1+xv[1]^4)*(1+xv[2]^4) Xmat2 = array(0,c(1.5e4,2)) Uvec = runif(1.5e4) steps = array(rnorm(3e4)*.1,c(1.5e4,2)) for(i in 1:1.5e4) { aux = tmp+steps[i,] rho = exp(lfdens(aux) - lfdens(tmp)) if (Uvec[i] < rho) tmp = aux Xmat2[i,] = tmp } > sum(Xmat2[-1,1] == Xmat2[-1.5e4,1]) [1] 1481 #### far fewer "wasted" steps (now about 10%), but may still take long to converge > hist(Xmat2[5001:1e4,1], prob=T, nclass=50, ylim=c(0,1), main= "Overplotted Estimated Densities from MCMC", xlab="coord 1") lines(density(Xmat2[5001:1e4,1]), col="red") lines(density(Xmat2[10001:1.5e4,1]), col="blue") lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple") #### NOW THE BLACK CURVE IS THE ONE FROM THE PREVIOUS METHOD, AND THE OTHER CURVES #### FROM THE TWO POST-BURN-IN BLOCKS USING THE CURRENT (random-walk) METHOD > Xmat3 = Xmat2 tmp = Xmat2[1.5e4,] steps = array(rnorm(3e4)*.1,c(1.5e4,2)) Uvec = runif(1.5e4) for(i in 1:1.5e4) { aux = tmp+steps[i,] rho = exp(lfdens(aux) - lfdens(tmp)) if (Uvec[i] < rho) tmp = aux Xmat3[i,] = tmp } lines(density(Xmat3[10001:1.5e4,1]), lty=5) ### We can see that these further iterations cause the density to shift to the right, ### towards the blue density tmp = Xmat3[1.5e4,] for(j in 1:300) { steps = array(rnorm(3e4)*.1,c(1.5e4,2)) Uvec = runif(1.5e4) for(i in 1:1.5e4) { aux = tmp+steps[i,] rho = exp(lfdens(aux) - lfdens(tmp)) if (Uvec[i] < rho) tmp = aux Xmat3[i,] = tmp }} lines(density(Xmat3[1:1.5e4,1]), lty=6, lwd=2, col="orange") legend(locator(), legend=c("Meth2, Blk2", "Meth2, Blk3", "Meth1, Final", "Meth2, Blk4-6", "Meth2, Final"), lty=c(1,1,1,5,6), lwd=c(1,1,2,1,2), col=c("red","blue","purple","black","orange")) ### picture saved as "MCMCdens.pdf" ### THE CONVERGENCE IS SLOWER IN METHOD 2, DESPITE FEWER WASTED STEPS ### BUT THE ULTIMATE COORD.1 DENSITY IS PLAUSIBLY THE SAME. ### Just to round this out, let's try the random walk method with larger steps. tmp = c(0,0) XmatB = array(0,c(1.5e4,2)) Uvec = runif(1.5e4) steps = array(rnorm(3e4)*.5,c(1.5e4,2)) for(j in 1:20) { for(i in 1:1.5e4) { aux = tmp+steps[i,] rho = exp(lfdens(aux) - lfdens(tmp)) if (Uvec[i] < rho) tmp = aux XmatB[i,] = tmp } } plot(density(Xmat[,1])$x, density(Xmat[,1])$y, type="n", xlim=c(-1.5,1.5), ylim=c(0,1), main= "Overplotted Estimated Densities from MCMC", xlab="coord 1") lines(density(XmatB[,1]), lwd=2) lines(density(Xmat3[1:1.5e4,1]), lty=6, lwd=2, col="orange") lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple") #### This one converged quite a bit quicker > sum(XmatB[-1,1]==XmatB[-1.5e4,1]) [1] 6575 ### again at the cost of many "wasted" steps