HW 20 Stat 705 Solution ## (A) > library(faraway) Fat = fat[-c(39,42),c("height","weight")] dim(Fat) [1] 250 2 > fit1 = lm(weight ~ height, data=Fat) > round(summary(fit1)$coef,5) Estimate Std. Error t value Pr(>|t|) (Intercept) -194.48613 39.62298 -4.90842 0 height 5.29948 0.56321 9.40934 0 > apply(Fat,2,mean) height weight 70.3030 178.0832 > summary(fit1)$sig [1] 23.2551 > abline(6,2.5, col="blue") > abline(Fat$coef[1],Fat$coef[2], col="orange") > sqrt(quantile(1/rgamma(1e5,.4,100), prob=c(.05,.25,.5,.75,.95))) 5% 25% 50% 75% 95% 7.73515 14.07062 26.07546 64.51002 484.26703 ### So there is a LOT of uncertainty in the standard deviation, which ### allows our prior for (a,b) to be so different from the fitted values ### and yet somewhat compatible with the data. ## Let x = Fat$height and y = Fat$weight , both 250-dimensional vectors. ## From discussion in class and direct calculation we find that the posterior ## density of the parameters is proportional to : exp(-(a-6)^2/8 - 2*(b-2.5)^2) * tau^(.4-1) * exp(-100*tau) * tau^(250/2) * exp( -(tau/2)*sum_{j=1}^{250} (y[i]-a-b*x[i])^2 ) ## Therefore the full conditional posterior densities are: tau given (a,b,x,y) ~ Gamma(125.4, 100 + 0.5*sum((y-a-b*x)^2) ) a given (tau,b,x,y) ~ N( (3+500*tau*(mean(y)-b*mean(x))/(0.5+500*tau) , 1/(0.25 + 250*tau) ) ## this is variance not sd b given (tau,a,x,y) ~ N( (10+tau*sum(x*(y-a)))/(4+tau*sum(x^2)), 1/(4+tau*sum(x^2)) ) ## Therefore we code the Gibbs sampler, starting from the least-squares fit, ## as follows (taking the parameters in the order a,b,tau): parm = c(fit1$coef, 1/summary(fit1)$sig^2) x = Fat$height; y = Fat$weight xbar = mean(x) ybar = mean(y) sx2 = sum(x^2) sxy = sum(x*y) parMat = array(0, c(1.5e4, 3)) for(k in 1:1.5e4) { parm[1] = (3+500*parm[3]*(mean(y)-parm[2]*mean(x)))/(0.5+500*parm[3]) + sqrt(1/(0.25+250*parm[3]))*rnorm(1) parm[2] = (10+parm[3]*(sxy-250*parm[1]*xbar))/(4+parm[3]*sx2) + sqrt(1/(4+parm[3]*sx2))*rnorm(1) parm[3] = rgamma(1, 125.4, 100 + 0.5*sum((y-parm[1]-parm[2]*x)^2)) parMat[k,] = parm } > hist(parMat[5001:15000,1], nclass=50, prob=T, xlab="a", main="") > hist(parMat[5001:15000,2], nclass=50, prob=T, xlab="b", main="") > hist(1/sqrt(parMat[5001:15000,3]), nclass=50, prob=T, xlab="sig", main="") ### These values are respectively centered near: > round(c(median(parMat[5001:15000,1]), median(parMat[5001:15000,2]), 1/sqrt(median(parMat[5001:15000,3]))), 4) [1] 5.4918 2.4595 24.3802 ### We can expect that this would not change with another several blocks ### of 10000 Gibbs-Sampler iterations. ### (i)-(ii) smoothed posterior density estimates of a, b, and sig , overplotted ### with normal densities of same mean and variance > kerfcn = function(xnew, bw, pts, kern=function(x) dnorm(x)) { n = length(pts) c(outer(xnew, pts, function(x,y) kern((x-y)/bw)/bw) %*% rep(1/n,n)) } > tmp = parMat[5001:15000,1] hist(tmp, nclass=60, prob=T, xlab="Intercept Param = a", main= "Overplotted Hist, Density, Normal Approx.") curve(kerfcn(x,.25, tmp), -2, 15, add=T, col="orange") curve(dnorm(x,mean(tmp),sd(tmp)),-2,15, add=T, col="blue") ### Very good approximation here > tmp = parMat[5001:15000,2] hist(tmp, nclass=60, prob=T, xlab="Slope Param = b", main= "Overplotted Hist, Density, Normal Approx.") curve(kerfcn(x,.02, tmp), 2.3, 2.6, add=T, col="orange") curve(dnorm(x,mean(tmp),sd(tmp)),2.3,2.6, add=T, col="blue") ### Here the approximation is not so good: posterior is slightly less peaked ### and more spread than the approximating normal with same mean and variance ### > tmp = 1/sqrt(parMat[5001:15000,3]) hist(tmp, nclass=60, prob=T, xlab="SD Param = sig", main= "Overplotted Hist, Density, Normal Approx.") curve(kerfcn(x,.2, tmp), 20, 30, add=T, col="orange") curve(dnorm(x,mean(tmp),sd(tmp)),20,30, add=T, col="blue") ### Here the approximation is again very good. ### (iii) the posterior medians > round(c(median(parMat[5001:15000,1]), median(parMat[5001:15000,2]), 1/sqrt(median(parMat[5001:15000,3]))), 4) [1] 5.4918 2.4595 24.3802 ### (iv) 90% Bayesian credible intervals > round(apply(cbind(parMat[5001:15000,1:2], 1/sqrt(parMat[5001:15000,3])),2, function(col) quantile(col, prob=c(.05,.95))), 3) [,1] [,2] [,3] 5% 2.198 2.401 22.723 95% 8.822 2.517 26.328 ### (v) 90% (frequentist, least-squares) confidence intervals calculated from ### the original "fat" dataset. > inta = fit1$coef[1] + c(-1,1)*qnorm(.95)*summary(fit1)$coef[1,2] [1] -259.6601 -129.3121 intb = fit1$coef[2] + c(-1,1)*qnorm(.95)*summary(fit1)$coef[2,2] [1] 4.373074 6.225885 ### posterior probability that a and b BOTH lie within these intervals > mean(parMat[5001:15000,1]>inta[1] & parMat[5001:15000,1] < inta[2] & parMat[5001:15000,2]>intb[1] & parMat[5001:15000,1] < intb[2]) ### = 0 ### (B) The purpose of the generation of new "pseudo"-datasets in the last part is ## to see whether the "posterior predictive" data-generating mechanism gives data ## with the same general qualitative properties as the original dataset. You can ## think of this as a kind of goodness-of-fit check. ## "Posterior-predictive" means that we want to use the same model, but with ## parameters (a,b,tau) that are consistent with what we know about those parameters ## after looking at the dataset and computing the posterior. Thus,the parameters ## (a,b,tau) should be drawn from the POSTERIOR not the prior ! Then, from the ## parameters, we generate brand-new data using the conditional density f(y|a,b,tau,x). param = parMat[c(14200, 14400, 14600, 14800),] > param [,1] [,2] [,3] [1,] 3.285990 2.489228 0.001947559 [2,] 7.597364 2.397629 0.001975801 [3,] 1.498228 2.535951 0.001572550 [4,] 1.802602 2.526802 0.001551472 ### Recall that the 3rd column is tau=1/sig^2 ## Now simulate 4 datasets y = weight of size 250, holding x = height fixed. > NewDat = array(0, c(250,4)) for(i in 1:4) NewDat[,i] = param[i,1]+param[i,2]*Fat$height+ rnorm(250)/sqrt(param[i,3]) > plot(Fat$height, NewDat[,1]) points(Fat$height, Fat$weight, pch=18) ### The empty circlesare the posterior predictive pseudo-data; ### they look very much like the original data (solid diamonds) except for ### extra points along the lower boundary of the point cloud ! > plot(Fat$height, NewDat[,2]) points(Fat$height, Fat$weight, pch=18) ## similar phenomenon, the old points (black diamonds) are slightly higher ## and to the left of the 2nd sample of pesudo-data. > plot(Fat$height, NewDat[,3]) points(Fat$height, Fat$weight, pch=18) abline(fit1$coef, col="red") ## now the old points are generally more concentrated toward the fit1 line > plot(Fat$height, NewDat[,4]) points(Fat$height, Fat$weight, pch=18) ### similar pattern as for plots of pseudo datasets 1 and 2.