Solution Log for HW7 problem in Stat 798c ========================================= 5/14/03 > HW7dat <- scan("/home1/evs/public_html/s798c/Data/HW7data") > summary(HW7dat) ### vector of lgth 500 Min. 1st Qu. Median Mean 3rd Qu. Max. 0.003383 0.1992 0.5597 1.161 1.388 15 > motif() > hist(HW7dat, nclass=60, prob=T) (a) Kernel-density method, dlogis with bandwidth .1 . > pts7 <- sort(HW7dat)[(1:49)*10 + 5] kerMat7 <- outer(pts7, HW7dat, function(x,y) dlogis(x,y,.1)) lines(pts7, c(kerMat7 %*% rep(.002,500)) , lty=3) ### The plot shows this is VERY hard to fit way out in the tail where ### the density is small. (b) Smoothing-spline method. > spl7 <- smooth.spline(sort(HW7dat), (1: 500)/500, spar=5.e-7, all.knots=F) spldns7 <- predict.smooth.spline(spl7, c(sort(HW7dat)[ c((1:49)*10,491:500)],15) -> aux7, deriv=1) lines(aux7, spldns7$y) (c) Recall the previously defined function > lLk function(x, pwt, mn, scal, dens = dlogis) { invsc <- 1/scal dnsmat <- invsc*matrix(dens(invsc* outer(mn,x,"-")), ncol=length(x)) sum(log(pwt %*% dnsmat)) } ### But note that there is no hope to fit normal to the data-values ### themselves: instead fit to their logs, or alternatively get ### mixture of log-normal densities > lgdt7 <- log(HW7dat) > tmpft7 <- nlmin(function(w) -lLk(lgdt7,c(plogis(w[1]), (1-plogis(w[1]))*plogis(w[2]),(1-plogis(w[1]))* (1-plogis(w[2]))), w[3:5], exp(w[6:8]), dens=dnorm), c(1,0,log(c(.5,3,6)),log(c(.1,.3, .2))), print.level=1, max.fcal=300, max.iter=100) 0 1 it nf f reldf preldf reldx 0 1 1.208e4 1 2 2.610e3 7.840e-1 1.743e0 2.097e-1 2 4 1.184e3 5.462e-1 6.445e-1 3.158e-1 3 5 1.013e3 1.450e-1 1.023e0 4.263e-1 4 7 9.908e2 2.149e-2 1.075e-1 8.528e-2 5 8 9.156e2 7.589e-2 1.340e-1 1.061e-1 6 9 8.950e2 2.254e-2 3.941e-2 8.010e-2 ... 49 63 8.674e2 1.385e-10 1.016e-10 1.470e-2 50 64 8.674e2 7.263e-11 5.124e-11 1.428e-2 ***** relative function convergence ***** function 8.673897e2 reldx 1.4276e-2 function evaluations: 64 gradient evaluations: 571 preldf 5.1237e-11 npreldf 5.1295e-11 ### Now we have parameters for the mixture of normals ## on the logs of the observations: > tmpft7$x [1] -22.6712479 0.5620639 4.7054183 -1.2237658 0.3266433 [6] 15.0291864 0.2808054 -0.1560827 ## So the weights are plogis(-22.67) = 0, plogis(.56206) = .6369, ## and .3631. The two surviving components have ## means -1.22377 and 0.322664, and variances > exp(tmpft7$x[7:8]) [1] 1.3241959 0.8554884 > hist(lgdt7, nclass=60, prob=T) > lines(-6 + 9*(1:200)/200 -> xt , .6369*dnorm(xt,-1.22377, 1.32420) + .3631*dnorm(xt,.322664,.855488)) ### Not a very good fit: but not a bad one to the derivative of the ### smoothed df. (d) Upper quartiles: ### First superpose the df's in place of normal densities and solve ### quantile equation > Fest7 <- function(x) { tmpm <- outer(x,HW7dat, function(x,y) .002*pnorm(10*(x-y))) c(tmpm %*% rep(1, 500)) - 0.75 } > Fest7(1:2) [1] -0.09710544 0.07532441 > uniroot( Fest7, interval=c(1,2))$root [1] 1.418043 #### Estimated root for kernel-density method > uniroot( function(x) predict.smooth.spline(spl7,x)$y-.75, interval=c(1,2))$root [1] 1.399578 ### Estimated upper quartile using splines > exp(uniroot(function(x) .6369*pnorm(x,-1.22377, 1.32420) + .3631*pnorm(x,.322664,.855488) -0.75, log(1:2))$root) [1] 1.427958 ### Estimated upper quartile from (log-)normal mixture (e) Bootstrap estimate (1000 replications): ### First generate a confidence interval using the spline > tmp7 <- smooth.spline((1:500)/500, sort(HW7dat), spar=5.e-7, all.knots=F) ### Now generate data in blocks of 100. > pboot7 <- numeric(1000) for(i in 1:10) { tmpm <- matrix(predict.smooth.spline(tmp7,runif(50000))$y, ncol=100) pboot7[(i-1)*100+1:100] <- apply(tmpm,2, function(vec) quantile(vec,.75))} ### Now pboot7 contains 1000 upper-quartile values: the mean is ### the bootstrap estimate, > mean(pboot7) [1] 1.417956 ### and the empirical .025 to .975 quantile interval is the ### confidence interval > quantile(pboot7,c(.025,.975)) 2.5% 97.5% 1.235356 1.741055 ### VERY WIDE !!! ### Next the nonparametric bootstrap: > npboot7 <- apply(matrix(sample(HW7dat,5e5, replace=T), ncol=1000),2, function(vec) quantile(vec,.75)) > c(quantile(HW7dat,.75), mean(npboot7)) 75% 1.388275 1.416397 ### respectively, the upper quartile for this ### dataset and the bootstrap estimated upper quartile ### Now comes the nonparametric bootstrap 95% CI: > 1.388275 - quantile(npboot7-1.388275,c(.975,.025)) 97.5% 2.5% 1.062359 1.532689 ### Note the asymmetry !!!