## Script creating example data for Class Log on Data-Splitting ## Parametric Bootstrap , and Posterior Predictive Simulation ##===================================================== 11/25/15 set.seed(5) x = runif(200) y = 3 - 7*x + 10*(0.1+abs(x-.5))^1.5*rt(200,10) rtwt = 1/(0.1+abs(x-.5))^1.5 Dset = cbind.data.frame(y, x, wt=rtwt^2) tmp1 = lm(y~x, data=Dset[1:100,], weight=wt) TstDat = TrnDat = array(0, c(1000,100)) TrnMSE = SplMSE = numeric(1000) for(i in 1:1000) { ind = sample(1:200, 100) TrnDat[i,] = setdiff(1:200,ind) tmp = lm(y ~ x, weights=wt, data=Dset[TrnDat[i,],]) TrnMSE[i] = mean(tmp$resid^2) SplMSE[i] = mean((Dset$y[ind] - predict(tmp,Dset[ind,], interval="none", type="response", weights=Dset$wt[ind]))^2) } xvec = x[1:100] wts = Dset$wt[1:100] AuxY = tmp1$fitted + array(rnorm(100*1000,0, rep((0.1+abs(xvec-0.5))^1.5*summary(tmp1)$sig,100)), c(100,1000)) MSEboot = numeric(1000) for(i in 1:1000) MSEboot[i] = mean(lm(AuxY[,i]~xvec, weights=wts)$resid^2) Xarr = array(runif(1e4), c(100,100)) ### index (b,i) Yarr = array(0, c(1000,100,100)) ## indices (r,b,i) : r indexes y-vector rep. within fixed X^(b), ### and i=1:100 indexes data points within each batch of n=100. abhat = tmp1$coef sighat = summary(tmp1)$sig epsarr = array(0, c(100,1000)) Wtarr = (0.1+abs(Xarr-0.5))^(-1.5) for(b in 1:100) { epsarr[,] = rnorm(1e5) Yarr[,b,] = t( (abhat[1] - abhat[2]*Xarr[b,]) + (sighat*(0.1+abs(Xarr[b,]-0.5))^1.5)*epsarr ) } Ypred = replace(Yarr,T,0) ### array like Yarr filled with 0's Xmn = apply(Xarr,1,mean) XctrArr = -Xmn + Xarr SXXsq = apply(XctrArr^2,1,sum) auxmat = array(outer(rep(1,1000), XctrArr)*Yarr, c(1000*100,100)) b2hatarr = array( c( auxmat %*% rep(1,100))/ rep(SXXsq, rep(1000,100)), c(1000,100)) ### 1000 x 100 (r,b) b1hatarr = apply(Yarr,1:2,mean) - t( Xmn*t(b2hatarr) ) for(b in 1:100) Ypred[,b,] = b1hatarr[,b] + outer(b2hatarr[,b],Xarr[b,]) MSEarr = apply( (Yarr-Ypred)^2, 1:2, mean) ### 1000x100 (r,b) Biasarr = apply(Ypred - Yarr, 1:2, mean) xvec2 = runif(100) MSEboot2 = numeric(1000) for(i in 1:1000) MSEboot2[i] = mean(lm(AuxY[,i]~xvec2, weights=wts)$res^2) Infmat = array(c(sum(Dset$wt), sum(Dset$wt*Dset$x), 0, sum(Dset$wt*Dset$x), sum(Dset$wt*Dset$x^2), 0, 0, 0, nrow(Dset)/sighat^2)/sighat^2, c(3,3)) library(MASS) sghatsqArr = apply( (Yarr-Ypred)^2, 1:2, mean) beta = mean(c(sghatsqArr))/var(c(sghatsqArr)) alph = beta*mean(c(sghatsqArr))