HW3 Solutions Log 3/12/03 (A) Idea is to simulate (x,y) pairs in unit square and reject if ( x<1/3 & y>3*x ) or (x >= 1/3 & y> 1.5*(1-x)). Probability of rejection is exactly 1/2 each time. So to generate 1000 pairs, you will generate about E(Geom(0.5))*1000 = 2000 pairs of Unif[0,1] variates (U_1,U_2). The probability that it will take more than 2200 tries to get 1000 successes is pbinom(999,2200,.5)= 9e-6, so we generate 2200 pairs outright. > Umat <- matrix(runif(4400), ncol=2) > Tvec <- (1:2200)[(Umat[,1] < 1/3 & Umat[,2] <= 3*Umat[,1]) | (Umat[,1] >= 1/3 & Umat[,2] <= 1.5*(1-Umat[,1]))] > XYmat <- Umat[Tvec[1:1000],] > motif() > plot(XYmat[,1],XYmat[,2])plot(XYmat[,1],XYmat[,2]) > unix.time( { Umat <- matrix(runif(4400), ncol=2) Tvec <- (1:2200)[(Umat[,1] < 1/3 & Umat[,2] <= 3*Umat[,1]) | (Umat[,1] >= 1/3 & Umat[,2] <= 1.5*(1-Umat[,1]))] XYmat <- Umat[Tvec[1:1000],] }) ### .02 secs (B) > Umat <- matrix(runif(2000), ncol=2) ### Use Prob. integral transform and cond'l transform. > inds <- Umat[,1] <= 1/3 XYmat <- array(0, dim=c(1000,2)) XYmat[inds,1] <- sqrt(Umat[inds,1]/3) XYmat[inds,2] <- 3*XYmat[inds,1]*Umat[inds,2] XYmat[!inds,1] <- 1-sqrt((2/3)*(1-Umat[!inds,1])) XYmat[!inds,2] <- 1.5*(1-XYmat[!inds,1])*Umat[!inds,2] > plot(XYmat[,1],XYmat[,2]) > unix.time( {Umat <- matrix(runif(2000), ncol=2) inds <- Umat[,1] <= 1/3 XYmat <- array(0, dim=c(1000,2)) XYmat[inds,1] <- sqrt(Umat[inds,1]/3) XYmat[inds,2] <- 3*XYmat[inds,1]*Umat[inds,2] XYmat[!inds,1] <- 1-sqrt((2/3)*(1-Umat[!inds,1])) XYmat[!inds,2] <- 1.5*(1-XYmat[!inds,1])*Umat[!inds,2]}) ### .02 secs essentially the same !!! Third method, using special geometry and prob 1/3 that X < 1/3 > unix.time({ XYmat <- matrix(runif(2000), ncol=2) indsA <- (XYmat[,1] < 1/3) & (XYmat[,2] > 3*XYmat[,1]) indsB <- (XYmat[,1]>= 1/3) & (XYmat[,2] > 1.5*(1-XYmat[,1])) XYmat[indsA,] <- cbind(rep(1/3,sum(indsA)), rep(1,sum(indsA)))-XYmat[indsA,] XYmat[indsB,] <- cbind(rep(4/3,sum(indsB)), rep(1,sum(indsB)))-XYmat[indsB,]}) ### .02 secs essentially the same !!! ### Too hard to choose between these !! Re-do them with 10000 ### iterations. > unix.time( { Umat <- matrix(runif(41500), ncol=2) Tvec <- (1:20750)[(Umat[,1] < 1/3 & Umat[,2] <= 3*Umat[,1]) | (Umat[,1] >= 1/3 & Umat[,2] <= 1.5*(1-Umat[,1]))] XYmat <- Umat[Tvec[1:10000],] }) [1] 0.150 0.030 1.00 0 0 ### Method 1. > unix.time( {Umat <- matrix(runif(20000), ncol=2) inds <- Umat[,1] <= 1/3 XYmat <- array(0, dim=c(10000,2)) XYmat[inds,1] <- sqrt(Umat[inds,1]/3) XYmat[inds,2] <- 3*XYmat[inds,1]*Umat[inds,2] XYmat[!inds,1] <- 1-sqrt((2/3)*(1-Umat[!inds,1])) XYmat[!inds,2] <- 1.5*(1-XYmat[!inds,1])*Umat[!inds,2]}) [1] 0.140 0.01 1.00 0 0 ### Method 2. > unix.time({ XYmat <- matrix(runif(20000), ncol=2) indsA <- (XYmat[,1] < 1/3) & (XYmat[,2] > 3*XYmat[,1]) indsB <- (XYmat[,1]>= 1/3) & (XYmat[,2] > 1.5*(1-XYmat[,1])) XYmat[indsA,] <- cbind(rep(1/3,sum(indsA)), rep(1,sum(indsA)))-XYmat[indsA,] XYmat[indsB,] <- cbind(rep(4/3,sum(indsB)), rep(1,sum(indsB)))-XYmat[indsB,]}) [1] 0.13 0.00 1.00 0.00 0.00 ### Method 3. ## So there is a slight preference for method 2 or 3. (C) Now consider 2x2 tables and Xsq statistic: > unix.time({n1plus <- rbinom(1000,100,.6) n11 <- rbinom(1000,n1plus,.7) n21 <- rbinom(1000,100-n1plus,.7) ### This is much simpler than working directly with "sample" ! nplus1 <- n11+n21 nexp <- cbind((n1plus/100)*cbind(nplus1,100-nplus1), (1-n1plus/100)*cbind(nplus1,100-nplus1)) nobs <- cbind(n11,n1plus-n11,n21,100-n1plus-n21) Xsq <- c(((nobs-nexp)^2/nexp) %*% rep(1,4))}) c(summary(Xsq), rejfrac=mean(Xsq>3.84)) Min. 1st Qu. Median Mean 3rd Qu. Max. rejfrac 0 0.111 0.5432 1.052 1.412 13.24 0.049 ## On another timing run, which took 0.06 secs: Min. 1st Qu. Median Mean 3rd Qu. Max. rejfrac 0 0.08823 0.4884 1.042 1.338 14.65 0.057 ## In these cases, CI width is 1.96*sqrt(.05*.95/1000) = 0.0135 (D) Now try importance-sampling This problem, which we discussed twice in class [at which point I had still not solved it correctly!] is a real object-lesson in the difficulties of importance sampling. The point is that too restrictive a set of working importance-sampling densities from which to choose may leave no good options !! In this problem, we began by trying to do importance-sampling from a working density (or rather, probability mass function) which was itself multinomial. Let's first go through those steps and understand how and why the importance-sampling estimators fail to work. > Import function(pvec, Nrep = 1000) { p1plus <- pvec[1] + pvec[3] p1f <- pvec[1]/p1plus p2f <- pvec[2]/(1 - p1plus) n1plus <- rbinom(Nrep, 100, p1plus) n11 <- rbinom(Nrep, n1plus, p1f) n21 <- rbinom(Nrep, 100 - n1plus, p2f) ### This is much simpler than working directly with "sample" ! nplus1 <- n11 + n21 nexp <- cbind((nplus1/100) * cbind(n1plus, 100 - n1plus), (1 - nplus1/ 100) * cbind(n1plus, 100 - n1plus)) nobs <- cbind(n11, n21, n1plus - n11, 100 - n1plus - n21) Xsq <- ((nobs - nexp)^2/nexp) %*% rep(1, 4) Ratio <- exp(nobs %*% log(c(0.42, 0.28, 0.18, 0.12)/pvec)) list(WtRej = (Xsq > 3.84) * Ratio, Xsq = Xsq, Ratio = Ratio, p1plus = p1plus, p1f = p1f, p2f = p2f, nobs = nobs, pvec = pvec, n1plus = n1plus) } > tmpo <- Import(c(.42,.28,.18,.12)) > table(tmpo$WtRej) 0 1 957 43 ### typical null-hypothesis run: .045 = size ### To get the right answer: > tmpo <- Import(c(.42,.28,.18,.12), Nrep=1e5) > c(mean(tmpo$WtRej) -> rp, 1.96*sqrt(rp*(1-rp)/1e5)) [1] 0.050740000 0.001360266 ## Interval is (.04938, .0521) ### Now proceed with p1plus=.6, p1f+p2f=.7, and ### pvec[1] based on hypergeometric variance and normal ### approximation, equal to (42+1.96*sqrt(60*70*30*40/(99*100^2))/100 = 0.4642 ### It is easy to verify that the Xsq chi-square statistic for ### independece of row and column classifications is 3.84 ### when the expected table values (with table-total 100) ### are substituted as data. > tmpo <- Import(c(.464, .236, .136, .164)) > mean(tmpo$Ratio) [1] 0.9171026 ### Not too terrible > apply(c(tmpo$Ratio)*tmpo$nobs,2,mean) n11 n21 38.32863 25.44641 16.23419 11.70104 ## But also not very accurate > mean(tmpo$WtRej) [1] 0.0250326 ### Only upper-tail rejection cases accurately estimated! > tmpo <- Import(c(.464, .236, .136, .164)) ## 2nd run > apply(c(tmpo$Ratio)*tmpo$nobs,2,mean) n11 n21 31.43296 20.03611 12.40591 9.479093 ## Not accurate at all! > mean(tmpo$Ratio) [1] 0.7335407 ### This is a large part of the reason!! I mentioned in class that one could try to search for a good probability-vector with which to do importance sampling by constraining the Xsq statistic to 3.84 for the expected table (with specified table total, default 100) and trying to optimize Kullback-Leibler values. Ordinarily this is a good strategy, and I tried to implement it using the following little function: ## We code the calculation of probability vectors qvec to try ## into a function, allowing also the possibility ## that the row-sum a not = .6, and col-sum b not =.7. ## The function also calculates Kullback-Leibler distance KL: > KLfcnX function(a, b, q11 = NULL, n = 100, cut = 3.84, p0 = c(0.42, 0.28, 0.18, 0.12), up = T) { ## Here a = 1st row-prob, b=1st col-prob, and ## if q11 is not specified, we put it equal to the ## upper-cell prob. s.t. Xsq for expected table = cut . if(is.null(q11)) q11 <- max(0, min(a * b + (2 * as.numeric(up) - 1) * sqrt((cut * a * (1 - a) * b * (1 - b))/n), 1)) qvec <- c(q11, b - q11, a - q11, 1 - a - b + q11) list(KL = sum(qvec * log(qvec/p0)), KL2 = sum(p0 * log( p0/qvec)), qvec = qvec) } > unlist(nlminb(c(.55,.65), function(av) KLfcnX(av[1], av[2])$KL)[c(1,2,4)]) parameters1 parameters2 objective grad.norm 0.6010131 0.7003305 0.01898192 3.769823e-08 ### This shows that one cannot do better than the values: > round(unlist(KLfcnX(.6,.7)),5) KL qvec1 qvec2 qvec3 qvec4 0.01898 0.46399 0.23601 0.13601 0.16399 ## Here is a `good' batch, but still not accurate enough !! > tmpo <- Import(KLfcnX(.6,.7,cut=.384)$qvec) ## about 90 rejections > mean(tmpo$WtRej) [1] 0.05281012 > mean(tmpo$Ratio) [1] 0.9978234 > tmpo$pvec [1] 0.4339117 0.2660883 0.1660883 0.1339117 > apply(tmpo$nobs,2,mean) n11 n21 43.378 26.742 16.496 13.384 > apply(c(tmpo$Ratio)*tmpo$nobs,2,mean) n11 n21 41.92289 28.17413 17.73505 11.95027 > var(tmpo$WtRej) ## 0.1287217 ### This means the width of the CI is > 1.96*sqrt(var(tmpo$WtRej)/1000) ## = .0222 ### In this example, although we can arrange for more rejections ### under the modified multinomial distribution, importance ### sampling appears to INCREASE variability ! ### The reason, in large part, is the terrific variability of ### the simulated likelihood ratios !!! ========================================================== If you tried this problem for a number of different (multinomial) parameter combinations, you probably found that often the `simulated' rejection probability was from .02 to .03, which might lead you to suspect that only one of the two tails was really being sampled in the limited number of 1000 replications. That is reasonable: in the multinomial distributions used so far in importance-sampling, those chi-squared rejection cases where the upper-left cell count is too SMALL are too rare to be represented well (or maybe at all!?) in simulations with 1000 replications. This suggests the possibility that it might be clever to do importance sampling by generating 50% of tables with n11 too high and 50% with it too low. That is, perhaps things would behave a little more stably if we simulated from a MIXTURE of two multinomial distributions and made THAT the working g. Here is a function which does it. > ImportMix function(a, b, Delta, Nrep = 1000, p0 = c(0.42, 0.28, 0.18, 0.12)) { ### Here a is 1st row-sum, b is 1st col-sum, and ### Delta is abs(q11-a*b). We simulate and do importance- ### sampling from the mixture of two multinomials with ### equal weight, with q11 = ab plus or minus Delta. uppr <- 2 * rbinom(Nrep, 1, 0.5) - 1 p1f <- b + (Delta * uppr)/a p2f <- b - (uppr * Delta)/(1 - a) pvec <- p0 + c(1, -1, -1, 1) * Delta pvec2 <- p0 - c(1, -1, -1, 1) * Delta n1plus <- rbinom(Nrep, 100, a) n11 <- rbinom(Nrep, n1plus, p1f) n21 <- rbinom(Nrep, 100 - n1plus, p2f) ### Much simpler than working directly with "sample" ! nplus1 <- n11 + n21 nexp <- cbind((nplus1/100) * cbind(n1plus, 100 - n1plus), (1 - nplus1/100) * cbind(n1plus, 100 - n1plus)) nobs <- cbind(n11, n21, n1plus - n11, 100 - n1plus - n21) Xsq <- ((nobs - nexp)^2/nexp) %*% rep(1, 4) Ratio <- 2/(exp(nobs %*% log(pvec/p0)) + exp(nobs %*% log(pvec2/p0))) list(WtRej = (Xsq > 3.84) * Ratio, Xsq = Xsq, Ratio = Ratio, nobs = nobs, pvec = pvec, n1plus = n1plus) } > tmpo <- ImportMix(.6,.7,.036) > tmpo$pvec [1] 0.456 0.244 0.144 0.156 > summary(tmpo$Ratio) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.00453 0.2013 0.5443 0.9848 1.425 4.627 > round(c(mean(tmpo$WtRej), var(tmpo$WtRej), 1.96*sqrt(var( tmpo$WtRej)/1000)),5) [1] 0.05108 0.00763 0.00541 > sum(tmpo$WtRej>0) ### 348 = # rejections > tmpo <- ImportMix(.6,.7,.044) > tmpo$pvec [1] 0.464 0.236 0.136 0.164 > summary(tmpo$Ratio) Min. 1st Qu. Median Mean 3rd Qu. Max. 0.000545 0.07522 0.2778 1.049 1.074 11.63 > round(c(mean(tmpo$WtRej), var(tmpo$WtRej), 1.96*sqrt(var( tmpo$WtRej)/1000)),5) [1] 0.04983 0.00565 0.00466 > sum(tmpo$WtRej>0) ### 505 = # rejections > tmpo <- ImportMix(.6,.7,0) > round(c(mean(tmpo$WtRej), var(tmpo$WtRej), 1.96*sqrt(var( tmpo$WtRej)/1000)),5) [1] 0.05100 0.04845 0.01364 ### So this idea WORKS --- you can see that the width of the confidence interval based on an importance-sample of 1000 is about 1/3 as wide in a straighforward vanilla Monte Carlo simulation. So the gains are very worthwhile !!! ### A DIFFICULT PROBLEM ! The importance-sampling gains from a mixture of multinomials in this problem do not seem easy to summarize in terms of large-deviation probabilities. The point seems to be that the mixture g makes a lot more cases of interest in the Xsq rejection region have likelihood ratios f(X)/g(X) which are not too large or small.