Solution to Problem Set 9
-------------------------

## 1 Estimating    P(X_(3) + X_(7) + X_(10) > 4)  (*) based on N(0,1) sample of n=25
## 1(A)

> set.seed(537)
  system.time({ 
       narr = array(rnorm(1e6), c(4e4,25))
       out1 = apply(narr, 1, function(row)
             sum(sort(row)[c(3,7,10)]) )
       cat(mean(out1 > -1.5),"\n")
  })
0.16625

   user  system elapsed 
   3.95    0.01    4.04 

## CI =   .16625 + c(-1,1)*1.96*sqrt(.16625*(1-.16625)/4e4)
[1] 0.1626014 0.1698986

### Now antithetic variables

> set.seed(537)
  system.time({ 
       narr2 = array(rnorm(5e5), c(2e4,25))
       out2 = apply(narr2, 1, function(row)
             sum(sort(row)[c(3,7,10)]) )
       out2 = c(out2, apply(-narr2, 1, function(row)
             sum(sort(row)[c(3,7,10)]) ) )
       cat(mean(out2 > -1.5),"\n")
  })
0.165125 
   user  system elapsed 
   4.01    0.00    4.01 

### ONE THING TO NOTICE HERE: when we use antithetic variable method and replace each X_i by its negative
### the new k'th order statistic (for k=3,7,10) is actually the negative of the old 25-k+1 order statistic
### because the sign-reversal causes us to count from the opposite end. In this example, that means 
### there is actually an extra computational SAVINGS in the antithetic variable method because we only 
### have to do our sort once to get two output valuyes.
   
## In the antithetic-variable CI, replace sqrt(.165125*(1-.165125)/4e4) = 0.001856467     by

> sqrt(mean( (((out2[1:2e4]> -1.5)+(out2[2e4+(1:2e4)]> -1.5))/2-.165125)^2/2e4)
[1] 0.001691467

### So the variance in the antithetic-variable method will be smaller
###   by a factor   0.001691467/0.001856467 = 0.9111215
###   which is the factor by which computation is effectively reduced
### But CI width is multiplied by the sqrt of that, or   0.9545268.

### Now control variates, done two ways.

tmp = apply(narr,1, function(row) sort(row)[c(3,7,10)])

fram1 = data.frame(y=as.numeric(out1> -1.5), X1=out1, X2 = tmp[1,],
    X3 = tmp[2,], X4 = tmp[3,])

## Expectations of order statistics

>  mu1 = integrate(function(x) qnorm(x)*dbeta(x,3,23), 0, 1, rel.tol=1.e-9)
>  mu2 = integrate(function(x) qnorm(x)*dbeta(x,7,19), 0, 1, rel.tol=1.e-9)
>  mu3 = integrate(function(x) qnorm(x)*dbeta(x,10,16), 0, 1, rel.tol=1.e-9)
> mu1
-1.262753 with absolute error < 5.4e-10
> mu2
-0.6369037 with absolute error < 3.1e-11
> mu3
-0.3026752 with absolute error < 5.1e-12

> fram1$X2 = fram1$X2-mu1$val
  fram1$X3 = fram1$X3-mu2$val
  fram1$X4 = fram1$X4-mu3$val 
  fram1$X1 = fram1$X1-(mu1$val+mu2$val+mu3$val)

>  summary(lm(y ~ X1, data=fram1))$coef[1,1:2]
  Estimate Std. Error 
 0.1671009  0.0013904 
>  summary(lm(y ~ X2+X3+X4, data=fram1))$coef[1,1:2]
   Estimate  Std. Error 
0.167107209 0.001390422 

### So there is absolutely no benefit in using the 3 control 
###   variates in place of 1, but the accuracy beats the straight
###   Monte Carlo method by a variance factor  0.0013904/0.0018565 = 0.749

##---------------------------------------------------

### 2. IMPORTANCE SAMPLING: estimating   P(W_j > 0 for j=1,...6)     (**)
###    where   W = c( (inverse of (diag(0:5) + 0.5) ) %*% Z ) ,   and  Z  is vector of 6 iid N(0,1)'s

> Imat = diag(0:5)+ 0.5
> Amat3 = solve(Imat)
  Amat3
           [,1] [,2] [,3]       [,4]  [,5] [,6]
[1,]  4.2833333   -1 -0.5 -0.3333333 -0.25 -0.2
[2,] -1.0000000    1  0.0  0.0000000  0.00  0.0
[3,] -0.5000000    0  0.5  0.0000000  0.00  0.0
[4,] -0.3333333    0  0.0  0.3333333  0.00  0.0
[5,] -0.2500000    0  0.0  0.0000000  0.25  0.0
[6,] -0.2000000    0  0.0  0.0000000  0.00  0.2

### (a) direct Monte-Carlo:

> tot = 0
  set.seed(55555)
  system.time({ 
       for( i in 1:100)  {
	        Zmat = array(rnorm(6e6),c(1e6,6))
            Wmat = Zmat %*% Amat3
			tot = tot + sum( apply(Wmat,1,min) > 0 )
		} } )
  tot/1e8

   user  system elapsed 
 167.76    4.25  172.03 
>   tot/1e8
[1] 0.00011441
> SE1 = sqrt(.00011441*(1-.00011441)/1e8)
> SE1
[1] 1.069565e-06

    #### CI is .00011441 + c(-1,1)*1.96*SE1
[1] 0.0001123137 0.0001165063

set.seed(66666)
tot1=0
for( i in 1:100)  {
	        Zmat = array(rnorm(6e6),c(1e6,6))
            Wmat = Zmat %*% Amat3
			tot1 = tot1 + sum( apply(Wmat,1,min) > 0 )
		}
		tot1/1e8

#### Did the run a second time with   set.seed(66666)  and got value  tot/1e8 =  0.00011096
#### NB: this value is well outside the previous CI !!!!! 2-sided approximate p-value = 
> 2*(pnorm((.00011096-.00011441)/sqrt(.00011441*(1-.00011441)/1e8))   ### =  0.001257042

### (b) importance sampling:
  
## First indicate how to create a matrix 64 x 6 of plus and minus 1's so that the rows 
##    enumerate all possible patterns.

sign6mat = NULL
for(i in 1:6) sign6mat = cbind(sign6mat, rep(rep(c(-1,1),rep(2^(i-1),2)),64/2^i))
> c((0.5*(sign6mat+1)) %*% (2^(0:5)))
 [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
[26] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
[51] 50 51 52 53 54 55 56 57 58 59 60 61 62 63   ### shows that all 64 patterns occur

### Now the ratio of the density f_W over f_{|W|} for a six-vector of positive entries
### Suppose that WMat is  N x 6 matrix with positive-entry vectors as rows.
### Vector of ratios  f_W over f_{|W|} evaluated at successive rows wvec is as follows:

densRat = function(Wmat) {
               den = numeric(nrow(Wmat))
			   cfac = rep(0.5,6)
			   ones = rep(1,nrow(Wmat))
               for(i in 1:64) {
			           aux = ((outer(ones,sign6mat[i,])*Wmat) %*% Imat)^2
					   eterm = exp(-c( aux %*% cfac ))
					   den = den+eterm }
			   eterm/den }
			   
## Now do a simulation with N=1e6:

> system.time({			   
        Zmat = array(rnorm(6e6),c(1e6,6))
        Wmat = abs(Zmat %*% Amat3)
		impterm = densRat(Wmat)
		prob = mean(impterm) })

   user  system elapsed 
   8.33    1.76   10.10 

		
> prob
[1] 0.0001116938
> sd(impterm)/1e3
[1] 6.506749e-07 

### compare with:   sqrt(.00011441*(1-.00011441)/1e8) = [1] 1.069565e-06
### BUT NOTE: this slightly lesser standard error came with 100-fold smaller N !!!