Class Log on Importance Sampling with another Specific Example ============================================================== 10/11/17 Here is another imoportance sampling example: Y_i ~ iid logistic with density dlogis, which is symmetric with mean 0 and variance pi^2/3 Find: P( max_{k=1,..,10} k^(-1/2) \sum_{i=1}^k Y_i > 7 ) > tmp = array(rlogis(1e6), c(1e5,10)) abovD = outer(1:10,1:10, function(x,y) x <= y) #### to implement cumsum via linear algebra tmp2 = (tmp %*% abovD) * (rep(1,1e5) %o% sqrt(1/(1:10))) condvec = (apply(tmp2,1,max) > 7) > mean(condvec) [1] 0.00179 #### SE = sqrt(.00179*(1-.00179)/1e5) = 0.0001336711 = 1.337e-4 > summary(c(tmp2)) Min. 1st Qu. Median Mean 3rd Qu. Max. -14.361475 -1.189847 -0.007286 -0.007099 1.174707 12.328057 > summary(apply(tmp2,1,max)) -7.0179 0.3749 1.3890 1.4498 2.4678 12.3281 > quantile(apply(tmp2,1,max), prob=c(.8,.9,.95,.975,.99,.999)) 80% 90% 95% 97.5% 99% 99.9% 2.742518 3.504594 4.187642 4.804273 5.547388 7.538401 #### Try the computation using importance sampling, using logistic with scale 1.5 as proposal distribution tmp3 = array(1.5*rlogis(1e6), c(1e5,10)) ### Now create ratio of densities: drat = apply(tmp3,1, function(row) prod(dlogis(row)/dlogis(row,0,1.5))) tmp3 = (tmp3 %*% abovD) * (rep(1,1e5) %o% sqrt(1/(1:10))) auxvec = (apply(tmp3,1,max) > 7)*drat > mean(auxvec) [1] 0.001829323 > sd(auxvec)/sqrt(1e5) [1] 7.901115e-05 #### so var is diminished by factor (7.901e-5/1.337e-4)^2 = 0.349 ### NB in Monte-Carlo version, 179 cases nonzero out of 1e5; in this Imp-Samp, 2929 nonzero ### Try important sampling again, now with scale-factor 7/1.5 = 4.67 to make median close to 7 tmp4 = array(4.67*rlogis(1e6), c(1e5,10)) ### Now create ratio of densities: drat = apply(tmp3,1, function(row) prod(dlogis(row)/dlogis(row,0,4.67))) tmp4 = (tmp4 %*% abovD) * (rep(1,1e5) %o% sqrt(1/(1:10))) auxvec = (apply(tmp4,1,max) > 7)*drat c( est=mean(auxvec), SE=sd(auxvec)/sqrt(1e5)) est SE 54826.1570 713.2158 #### These estimates are awful: the "Confidence Interval" is way off !!! > sum(auxvec>0) [1] 47156 > summary(auxvec) Min. 1st Qu. Median Mean 3rd Qu. Max. 0 0 0 54826 675 4000755 ### So importance sampling by scale cannot allow such large scale factor tmp3 = array(2.25*rlogis(1e6), c(1e5,10)) ### Now create ratio of densities: drat = apply(tmp3,1, function(row) prod(dlogis(row)/dlogis(row,0,2.25))) tmp3 = (tmp3 %*% abovD) * (rep(1,1e5) %o% sqrt(1/(1:10))) auxvec = (apply(tmp2,1,max) > 7)*drat > c( est=mean(auxvec), SE=sd(auxvec)/sqrt(1e5)) est SE 0.0013558884 0.0007146948 ### Now the CI covers the truth (close to .0180) but the SE is much ### worse than the straight Monte-Carlo ### Now try importance sampling by shifting mean, not scale tmp4 = array(3+rlogis(1e6), c(1e5,10)) ### Now create ratio of densities: drat = apply(tmp4,1, function(row) prod(dlogis(row)/dlogis(row,3,1))) tmp4 = (tmp4 %*% abovD) * (rep(1,1e5) %o% sqrt(1/(1:10))) auxvec = (apply(tmp2,1,max) > 7)*drat > c( est=mean(auxvec), SE=sd(auxvec)/sqrt(1e5)) est SE 1.059208e-04 9.324804e-05 ### Again the CI covers, but the estimate is too imprecise. ##>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> EXPLORATORY DISPLAY BEFORE MODELING =================================== 10/11/17 (using "boxplot", "aggregate", and "factor") > datfram = InsectSprays > summary(datfram) count spray Min. : 0.00 A:12 1st Qu.: 3.00 B:12 Median : 7.00 C:12 Mean : 9.50 D:12 3rd Qu.:14.25 E:12 Max. :26.00 F:12 > tmp = aggregate.data.frame(datfram[[1]], by=datfram[2], function(col) c(mean(col), var(col))) spray x.1 x.2 1 A 14.500000 22.272727 2 B 15.333333 18.242424 3 C 2.083333 3.901515 4 D 4.916667 6.265152 5 E 3.500000 3.000000 6 F 16.666667 38.606061 > boxplot(count ~ spray, data=datfram) > x11(width=6, height=5) par(mfrow=c(2,3)) > for(i in 1:6) { ind = which(datfram$spray==LETTERS[i]) plot((datfram$count[ind]-tmp[i,2][1])/sqrt(tmp[i,2][2]), xlab="Obs#", ylab="Std.Obs", ylim=c(-2.5,2.5), main=paste("Std Obs, spray",LETTERS[i])) } > mean.spr = tmp[as.numeric(datfram$spray),2][1] sd.spr = sqrt(tmp[as.numeric(datfram$spray),2][2]) > hist((datfram$count - mean.spr)/sd.spr, prob=T) > dev.off() > plot(datfram$spray, (datfram$count - mean.spr)/sd.spr) > plot(as.numeric(datfram$spray), (datfram$count - mean.spr)/sd.spr, pch=as.character(datfram$spray)) > plot(datfram$count, pch=as.character(datfram$spray))