R Log on MCMC, Final 2 Classes of Fall 2017 STAT 705
====================================================  Dec.6 & 11, 2017

Examples of Statistical Uses of Bayes Posteriors Obtained by MCMC
=================================================================

(1) Recall "Nuclear Pump" Example from Dec.4 Log (Class Log 15).

> t(NucPmp)
     [,1] [,2] [,3]  [,4] [,5] [,6] [,7] [,8] [,9] [,10]
Fail  5.0  1.0  5.0  14.0  3.0 19.0  1.0    1  4.0  22.0
Tim  94.3 15.7 62.9 125.8  5.2 31.4  1.1    1  2.1  10.5

## lam1,...,lam10 were the parameters for failure rates on the 10 pumps,
## and beta was the prior parameter for the lam[i] ~ Gamma(1.8,beta)

We generated a block of 5000 successive MCMC 10-vector values

> LamBlock = tmp[,1:10]   ## from the posterior density for lam1..lam10

## Compare posterior means and medians with MLEs

> round(rbind(MLE = NucPmp[,1]/NucPmp[,2], Post.Exp = apply(LamBlock,2,mean),
    Post.med = apply(LamBlock,2,median)), 3)

          [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]
MLE      0.053 0.064 0.079 0.111 0.577 0.605 0.909 1.000 1.905 2.095
Post.Exp 0.071 0.154 0.104 0.123 0.632 0.616 0.802 0.812 1.284 1.836
Post.med 0.067 0.134 0.099 0.121 0.584 0.607 0.689 0.703 1.206 1.803

## Discuss the effect of a shared prior parameter !!
## THE MAIN POINT IS THAT THE BAYES FORMULATION CAUSES THE HIGHEST PUMP
##  ESTIMATES TO BE SHADED DOWN AND THE SMALLES TO BE SHADED UP.

## After looking at the posterior for beta, we find  expectation
> mean(tmp[,11])
[1] 2.511802

## which suggests that the estimated prior mean for the lam[i] values was
> 1.8/2.5118  =  0.7166      ### a point toward which posterior lambda's are pushed

> plot(density(LamBlock[,1]), xlim=c(0,2.3), xlab="lambda",
    main="Overlaid Posterior densities for Pump lambda's")
  for(j in 2:10) lines(density(LamBlock[,j]), lty=j)
  legend(locator(), legend=paste0("Pump ",1:10), lty=1:10)
     ### saved as "PumpLambdas.pdf"

# THE VERY NARROW POSTERIOR LAMBDA DENSITIES ARE THE ONES WITH LARGE t_i's.

## We can further obtain 95% Bayesian credible intervals of lambda_i's as

> round(rbind(med=apply(LamBlock,2,median),
    apply(LamBlock, 2, function(col) quantile(col, prob=c(.025,.975)))),3)

       [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10]
med   0.067 0.134 0.099 0.121 0.584 0.607 0.689 0.703 1.206 1.803
2.5%  0.028 0.029 0.042 0.070 0.191 0.377 0.141 0.139 0.439 1.153
97.5% 0.135 0.385 0.193 0.191 1.338 0.918 2.073 2.144 2.648 2.683

## Instead of the density estimate we can get by applying "density" to 
##   LamBlock[,i], another method is to use the full conditional
##   f(lam[i] | beta) ~ Gamma(ni+1.8, ti+beta)  averaged over beta

> plot(density(LamBlock[,6]), xlab="lambda", main="Density estimates for lambda[6]")
  curve( apply(outer(x,tmp[,11], function(x,y) 
      dgamma(x, NucPmp[6,"Fail"]+1.8, NucPmp[6,"Tim"]+y)),1,mean), add=T, col="blue")

Another Gibbs Sampler Example
=============================

OBJECTIVE: to simulate from "circular" Markov chain X1,...,X20  in which

X1 | X2,...,X20  = .3*X2 + .3*X20 + 0.4*rlogis(1)

Xj | X2,...,X_{j-1},X_{j+1}...X20  =   .3*X_{j-1}+.3*X_{j+1} + 0.4*rlogis(1)

X20 | X1,...,X19 = .3*X19 + .3*X1 + 0.4*rlogis(1)

where all of the logistic errors are independent of the X's conditioned on.
#--------------------------------------------------

      tmp = rlogis(20)
      Xmat = array(0, c(1.5e4,20))
      for(i in 1:1.5e4) {
           tmp[1] = .3*tmp[2]+.3*tmp[20]+.4*rlogis(1)
           for(j in 2:19) tmp[j] = .3*tmp[j-1]+.3*tmp[j]+.4*rlogis(1)
           tmp[20] = .3*tmp[1]+.3*tmp[19]+.4*rlogis(1) 
           Xmat[i,] = tmp  }

 plot(density(c(Xmat[5001:13000,])), xlab="Xj density",
           main="Estimated Density of Xj")

 lines(density(c(Xmat[13001:15000,17])), col="red")
### Close agreement suggests convergence


Metropolis-Hastings Algorithm Examples
======================================

(O) [See last segment of Rlogs/MCMC_Rlog09.txt for univariate examples, extended here.]
    [Also see at p.5 in the notes   https://www.math.umd.edu/~slud/Mini.MCMC/Lec1Figs/MCMCtalk.pdf
	how the Metropolis-Hastings with proposal q(x,y) depending on y alone can be viewed 
	as direct generalizations of accept-reject methods.]

(1) Simulate from:    f(x) = C* exp(x[1] + x[2] - (1+x[1]^4)*(1+x[2]^4))

### Here we could try a method with full conditionals, but these are messy 
###   and not easy to simulate from.

   Try proposal kernel    q(x, y) = (1/pi) * exp(-0.5 + y[1] + y[2] - y[1]^2- y[2]^2)
                   ~ bivariate normal indep with variances 0.5 and means 0.5

           logqker = function(xv,yv) yv[1] + yv[2] - yv[1]^2- yv[2]^2
           lfdens = function(xv) xv[1] + xv[2] - (1+xv[1]^4)*(1+xv[2]^4)
           Xmat = array(0,c(1.5e4,2))
           tmp = c(0,0)
           Uvec = runif(1.5e4)
           for(i in 1:1.5e4) {
                 aux = rnorm(2,0.5,sqrt(0.5))
                 rho = exp(lfdens(aux) - lfdens(tmp) + logqker(aux,tmp) -logqker(tmp,aux))                 
                 if (Uvec[i] < rho) tmp = aux 
                 Xmat[i,] = tmp }

> sum(Xmat[-1,1] == Xmat[-1.5e4,1])
[1] 5511                            ### so roughly 1 out of 3 steps "rejected"

 > hist(Xmat[1001:1e4,1], prob=T, nclass=50)
   lines(density(Xmat[1001:1e4,1]), col="red")

### How do we check this ???

  fmarg = function(x) {
             m = length(x)
             out = numeric(m)
             for(i in 1:m)   out[i] = integrate(function(y) 
                    exp(x[i]+y-(1+x[i]^4)*(1+y^4)),-Inf,Inf)$val
             out  }

> integrate(fmarg,-Inf,Inf)
1.574694 with absolute error < 5.5e-05

> hist(Xmat[1001:1.e4,1], prob=T, nclass=50, xlab="coord 1", main=
      "Overplotted Estimated Densities from MCMC")
  lines(density(Xmat[1001:1.e4,1]), col="red")
  curve(fmarg(x)/1.574694, -2,2, add=T, col="blue")
  lines(density(Xmat[10001:1.5e4,1]), col="green")
  
### IN THIS CASE, WE DO NOT HAVE TO WORRY ABOUT END EFFECTS OF THE DENSITY ESTIMATE,
###  SO THE PLOT SHOWING AGREEMENT BETWEEN THE HISTOGRAM AND ITS (red) DENSITY ESTIMATE
###  IS COMPLETELY PREDICTABLE. BUT THE AGREEMENT BETWEEN THAT DENSITY ESTIMATE AND THE 
###  ONE OBTAINED BY AVERAGING THE CONDITIONAL DENSITY GIVES EXTRA CONFIRMATION THAT 
###  THE LATTER IS PROBABLY THE MORE RELIABLE AND LESS ARBITRARY DENSITY-ESTIMTE CHOICE.

	### Now iterate many more times to check final convergence
	
	      XmatA = array(0,c(1.5e4,2))
          tmp = c(0,0)
		  for(j in 1:50) {
           Uvec = runif(1.5e4)
           for(i in 1:1.5e4) {
                 aux = rnorm(2,0.5,sqrt(0.5))
                 rho = exp(lfdens(aux) - lfdens(tmp) + logqker(aux,tmp) -logqker(tmp,aux))                 
                 if (Uvec[i] < rho) tmp = aux
                 XmatA[i,] = tmp } }

     lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple")	   
	### So we confirm that convergence happened quickly despite "wasted" steps.

(2) METROPOLIS-HASTINGS EXAMPLE WITH q(x,y) DEPENDING ON BOTH X,Y.

## First let's re-do the last example using q(x,y) to be the kernel for a random walk with 
##   small independent normal steps in R^2;  and NB: q(x,y) = prod(dnorm(x-y,0,.1)) = q(y,x)

           tmp = c(0,0)
           lfdens = function(xv) xv[1] + xv[2] - (1+xv[1]^4)*(1+xv[2]^4)
           Xmat2 = array(0,c(1.5e4,2))
		   Uvec = runif(1.5e4)
		   steps = array(rnorm(3e4)*.1,c(1.5e4,2))
		   for(i in 1:1.5e4) {
              aux = tmp+steps[i,]
              rho = exp(lfdens(aux) - lfdens(tmp))
              if (Uvec[i] < rho) tmp = aux 
              Xmat2[i,] = tmp }

> sum(Xmat2[-1,1] == Xmat2[-1.5e4,1])
[1] 1481    #### far fewer "wasted" steps (now about 10%), but may still take long to converge

>  hist(Xmat2[5001:1e4,1], prob=T, nclass=50, ylim=c(0,1), main=
      "Overplotted Estimated Densities from MCMC", xlab="coord 1")
   lines(density(Xmat2[5001:1e4,1]), col="red")
   lines(density(Xmat2[10001:1.5e4,1]), col="blue")
   lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple")	   
   #### NOW THE BLACK CURVE IS THE ONE FROM THE PREVIOUS METHOD, AND THE OTHER CURVES
   #### FROM THE TWO POST-BURN-IN BLOCKS USING THE CURRENT (random-walk) METHOD
   
>  Xmat3 = Xmat2
   tmp = Xmat2[1.5e4,]
   steps = array(rnorm(3e4)*.1,c(1.5e4,2))
   Uvec = runif(1.5e4)
   for(i in 1:1.5e4) {
        aux = tmp+steps[i,]
        rho = exp(lfdens(aux) - lfdens(tmp))
        if (Uvec[i] < rho) tmp = aux 
        Xmat3[i,] = tmp }
		
   lines(density(Xmat3[10001:1.5e4,1]), lty=5)	
   
### We can see that these further iterations cause the density to shift to the right,
###    towards the blue density 

   tmp = Xmat3[1.5e4,]
   for(j in 1:300) {
        steps = array(rnorm(3e4)*.1,c(1.5e4,2))
		Uvec = runif(1.5e4)
		for(i in 1:1.5e4) {
           aux = tmp+steps[i,]
           rho = exp(lfdens(aux) - lfdens(tmp))
           if (Uvec[i] < rho) tmp = aux 
           Xmat3[i,] = tmp }}
		   
	lines(density(Xmat3[1:1.5e4,1]), lty=6, lwd=2, col="orange")	   
	
	legend(locator(), legend=c("Meth2, Blk2", "Meth2, Blk3", "Meth1, Final",
	         "Meth2, Blk4-6", "Meth2, Final"), lty=c(1,1,1,5,6), lwd=c(1,1,2,1,2), 
			 col=c("red","blue","purple","black","orange"))
			           ### picture saved as "MCMCdens.pdf"
			 
### THE CONVERGENCE IS SLOWER IN METHOD 2, DESPITE FEWER WASTED STEPS			 
### BUT THE ULTIMATE COORD.1 DENSITY IS PLAUSIBLY THE SAME.

### Just to round this out, let's try the random walk method with larger steps.

           tmp = c(0,0)          
           XmatB = array(0,c(1.5e4,2))
		   Uvec = runif(1.5e4)
		   steps = array(rnorm(3e4)*.5,c(1.5e4,2))
		   for(j in 1:20) {
   		      for(i in 1:1.5e4) {
                aux = tmp+steps[i,]
                rho = exp(lfdens(aux) - lfdens(tmp))
                if (Uvec[i] < rho) tmp = aux 
                XmatB[i,] = tmp } }

	plot(density(Xmat[,1])$x, density(Xmat[,1])$y, type="n", xlim=c(-1.5,1.5),
	  ylim=c(0,1),  main= 
	  "Overplotted Estimated Densities from MCMC", xlab="coord 1")
    lines(density(XmatB[,1]), lwd=2)
	lines(density(Xmat3[1:1.5e4,1]), lty=6, lwd=2, col="orange")	   
	lines(density(XmatA[1:1.5e4,1]), lwd=2, col="purple")	   
  #### This one converged quite a bit quicker
  
  > sum(XmatB[-1,1]==XmatB[-1.5e4,1])
  [1] 6575                      ### again at the cost of many "wasted" steps