Homework Set 15 Solutions
-------------------------  11/18/2016

> nvec = c(105, 150, 140, 81, 52, 95, 74, 112, 140, 75)
> Xvec = c(48, 111,  74,  15,  46,  44,  24,  81,  85,  33)
> Xvec/nvec
 [1] 0.4571429 0.7400000 0.5285714 0.1851852 0.8846154 0.4631579 0.3243243
 [8] 0.7232143 0.6071429 0.4400000

> prior = function(p) (9/49)*dbeta(p,.4,.4) + (15/49)*dbeta(p,1,1) + 
             (25/49)*dbeta(p,2,2)
> curve(prior(x),0,1)
                          ### trimodal, with only a single calculus max at 0.5


###(a)  Exact posterior based on a specific (X, n) value

poster = function(p,X,n) dbeta(p,X+1,n-X+1)*prior(p)/
        ((9/49)*exp(lbeta(X+.4,n-X+.4)-lbeta(X+1,n-X+1)-lbeta(.4,.4))+
       (15/49)+(25/49)*exp(lbeta(X+2,n-X+2)-lbeta(2,2)-lbeta(X+1,n-X+1)))

postcdf = function(p,X,n) {
       p1 = (9/49)*exp(lbeta(X+.4,n-X+.4)-lbeta(X+1,n-X+1)-lbeta(.4,.4))
       p3 = (25/49)*exp(lbeta(X+2,n-X+2)-lbeta(2,2)-lbeta(X+1,n-X+1))
      (pbeta(p,X+.4,n-X+.4)*p1 + (15/49)*pbeta(p,X+1,n-X+1)+
       +pbeta(p,X+2,n-X+2)*p3)/(p1+(15/49)+p3)  }

## NOTE to grader: if student says the posterior cdf is the mixture with weights 9/49,
##    15/49, and 25/49 of the individual-component  pbeta(p,X+lam,n-X+lam) posteriors,
##    then take off 1.5 points out of 4 for this part (a).

### The quantiles can be found by inverting the posterior cdf at ,.025, .975

postCI = function(X,n) 
       c(lower = uniroot(function(p) postcdf(p,X,n)-0.025,c(0,1))$root,
         upper = uniroot(function(p) postcdf(p,X,n)-0.975,c(0,1))$root)

CImat = array(0, c(10,2), dimnames=list(paste0("Clus",1:10),c("lower","upper")))

for(i in 1:10) CImat[i,] = postCI(Xvec[i],nvec[i])

> round(t(CImat),4)
       Clus1  Clus2  Clus3  Clus4  Clus5  Clus6  Clus7  Clus8  Clus9 Clus10
lower 0.3658 0.6627 0.4462 0.1182 0.7642 0.3671 0.2313 0.6319 0.5237 0.3347
upper 0.5524 0.8020 0.6089 0.2871 0.9439 0.5629 0.4396 0.7956 0.6831 0.5529


### (b) Do the posteriors not from conjugate-beta formula but from first principles

pXdens = function(p,X,n) prior(p)*p^X*(1-p)^(n-X)

> integrate(function(x) pXdens(x,48,105),0,1)
5.090465e-33 with absolute error < 3.1e-37   ## rel error  1.64e-4

### So for X=48, n=105, posterior density will be  pXdens(p,48,105)/5.090465e-33

## For Laplace method, need max of integrand:

> pm= optimize(function(p) -pXdens(p,48,105), c(0,1))$min
[1] 0.4576265

### OK, this was obtained to high accuracy. Now the Laplace-method change-of-variable

logpX = function(p,X,n) log(prior(p)) + X*log(p) + (n-X)*log(1-p)
logdens = function(p,X,n,p.max) log(prior(p)) + X*log(p) + 
             (n-X)*log(1-p) - logpX(p.max,X,n)

tmp = integrate(function(z) exp(logdens(z+pm,48,105,pm)),1.e-8-pm,1-1.e-8-pm)
$value
[1] 0.120346

$abs.error
[1] 7.411108e-06

### relative error 7.4111e-6/.120346=6.1e-05 (same as without translation by pm)
 
### Compare the true posterior density with the one obtained by 
###     changed-variable direct integration:

> dens1 = poster(seq(.005,.995, by=.01),48,105)
  dens2 = numeric(100)
  for (i in 1:100) dens2[i] = exp(-log(tmp$val) + logdens(i*.01-.005,48,105,pm))
> summary(dens1-dens2)
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
0.000e+00 0.000e+00 1.000e-16 4.381e-11 1.137e-11 3.635e-10 

### So the directly integrated posterior agrees closely with the exact formula

## (c). Now use formula based on 2nd order Taylor approx in p around p.max of log density

LaplAppr = function(X,n) {
       tmp = nlm(function(p) -log(pXdens(p,X,n)), 0.5, hessian=T, 
                    steptol=1.e-10, gradtol=1.e-10)
       D = tmp$hess
       c(pm = tmp$est, D = D, Appx.Intg = sqrt(2*pi/D)) }

> LaplAppr(48,105)
         pm           D   Appx.Intg 
  0.4576266 427.8059905   0.1211899 

### The Approximation to the Integral of exp(logdens(p,48,105,pm)) is 0.12119
###  which we saw that the correct answer was 0.12035.

For the other clusters, we find the ratios of the Laplace approximations to 
the correct values of the denominators of the posteriors are:

> approx.ratio = numeric(10) 
  for(i in 1:10) {
       tmp1 = LaplAppr(Xvec[i],nvec[i])
       tmp2 =  integrate(function(p) exp(logdens(p,Xvec[i],nvec[i],tmp1[1])),0,1)       
       approx.ratio[i] = tmp1[3]/tmp2$val }

  round(rbind(ratio= approx.ratio, nvec = nvec),3)
         [,1]    [,2]    [,3]   [,4]   [,5]   [,6]   [,7]    [,8]    [,9] [,10]
ratio   1.007   1.004   1.005  1.007  1.007  1.008  1.009   1.006   1.005  1.01
nvec  105.000 150.000 140.000 81.000 52.000 95.000 74.000 112.000 140.000 75.00

## So these approximations are pretty good and get better as the size of n increases