Solutions to HW Set 8
---------------------

### (a) Give R code to generate (and then test the distribution in order 
### to check your work) 5,000 pseudo-random variates from the density
### on [0,7] proportional to:
##
##  1/(1+0.1*x^2)^2 + x^3 *exp(-x) + 0.2*(abs(x-3)<2) , 0 < x < 7

## This is a mixture of a rescaled  t_{3/2} r.v., Gamma(4,1) and Unif[1,5]
## We use 4 numerical integrals to ascertain the proportions
## and calculate the weight for each mixture class. 

## The weight is proportional to integral of each function on [0, 7]

p1 = integrate(function(x) 1/(1+0.1*x^2)^2,0,7)$val
p2 = gamma(4)*pgamma(7,4)
p3 = 0.8  ## integral of 0.2 on [1,5]

> pvec = c(p1,p2,p3)/(p1+p2+p3)
> pvec
[1] 0.2698676 0.6179667 0.1121657

## next note that the density of t_{3} is prop to (1+t^2/3)^2)
## so put  t/sqrt(3) = x/sqrt(10), t = sqrt(0.3)*x

f0 = function(x) 1/(1+0.1*x^2)^2 + x^3 *exp(-x) + 0.2*(abs(x-3)<2) 

RNGa = function(N) {
   Lv = sample(1:3,N, replace=T, prob=pvec)
   outx = numeric(N)
   outx[Lv==3] = runif(sum(Lv==3),1,5)                       # Uniform(1, 5)
   outx[Lv==2] = qgamma(pgamma(7,4)*runif(sum(Lv==2)),4)     # truncated Gamma(4,1)
   outx[Lv==1] = qt(0.5 + (pt(7*sqrt(.3),3)-.5)*
           runif(sum(Lv==1)),3)/sqrt(.3)                     # truncated t_3
   outx }                                               ### truncation is to [0,7]

> tmpout = RNGa(1e6)
  hist(tmpout, nclass=40, prob=T)
> curve(f0(x)/(p1+p2+p3),0,7, add=T, col="orange")
         ### Fit is excellent.


###---------------------------------------------- 

### (b) Show by simulation of 10000 independent data batches of size 50
### that the maximum likelihood estimators (MLEs) of each of the two 
### parameters  a,b   from a  dlogis(x,a,b) density (based on a sample of 
### size 50) is approximately normal, with the mean and variance predicted 
### from the Fisher Information for this estimation problem.

## we need to select specific choices of a,b; say  a=1, b=.5

## The Fisher Information 2x2 matrix has entries

> integrate(function(x) 4*dlogis(x,1,.5)^2,-Inf,Inf, rel.tol=1e-8)
1.333333 with absolute error < 1.3e-10                ### = I11

> integrate(function(x) 4*(2*(x-1)*dlogis(x,1,.5)+ 2*plogis(x,1,.5)-1)*
       dlogis(x,1,.5),-Inf,Inf, rel.tol=1e-8)         ### = I12 = I21
-3.441691e-15 with absolute error < 4.5e-09

> integrate(function(x) 8*(2*(x-1)^2*dlogis(x,1,.5)+4*(x-1)*plogis(x,1,.5)
       -2*(x-1)-0.5)*dlogis(x,1,.5),-Inf,Inf, rel.tol=1e-8)
5.719824 with absolute error < 1.4e-09

## Therefore the theoretical per-observation information is
> aux = diag(c(4/3, 5.719824))

## with corresponding inverse
> solve(aux)
     [,1]      [,2]
[1,] 0.75 0.0000000
[2,] 0.00 0.1748306

> negllk = function(ab,xrow) -sum(log(dlogis(xrow,ab[1],exp(ab[2]))))

> system.time({ set.seed(1333)         ### on my old laptop
  xmat = array(rlogis(5e5,1,.5), c(1e4,50))
  abarr = array(0, c(1e4,2))
  for (i in 1:1e4) {
      tmp = optim(c(1,log(.5)), negllk, xrow=xmat[i,], 
        control=list(reltol=1e-8), method="BFGS")
      abarr[i,] = c(tmp$par[1],exp(tmp$par[2]))
   }  })
  
   user  system elapsed 
   9.19    0.00    9.25 

> var(abarr)
             [,1]         [,2]
[1,] 1.500890e-02 2.879218e-05
[2,] 2.879218e-05 3.388702e-03

### corr is small (0.004)
### variances  .0150089  and  .003887
### should be compared with c(.75,.17)/50 = 
[1] 0.0150 0.0034

> hist(abarr[,1], nclass=50, prob=T)
  curve(dnorm(x, mean(abarr[,1]),sd(abarr[,1])), add=T, col="red")
        #### Very good agreement for "a" parameter !!

> hist(abarr[,2], nclass=50, prob=T)
  curve(dnorm(x, mean(abarr[,2]),sd(abarr[,2])), add=T, col="red")
### Small discrepancy (slight leftward bias) for "b" parameter estimator with n=50

## We re-do the simulation with sample size 200 to check that the 
## discrepancies were real small-sample effects !

 system.time({ set.seed(1555)         ### on my old laptop
  xmat = array(rlogis(2e6,1,.5), c(1e4,200))
  abarr2 = array(0, c(1e4,2))
  for (i in 1:1e4) {
      tmp = optim(c(1,log(.5)), negllk, xrow=xmat[i,], 
        control=list(reltol=1e-8), method="BFGS")
      abarr2[i,] = c(tmp$par[1],exp(tmp$par[2]))
   }  })

   user  system elapsed 
  24.08    0.03   24.20 

> rbind(diag(var(abarr2)), c(0.75,0.1748306)/200)
            [,1]         [,2]
[1,] 0.003684582 0.0008707228
[2,] 0.003750000 0.0008741530       ### Nice agreement!

> hist(abarr2[,1], nclass=50, prob=T)
  curve(dnorm(x, mean(abarr2[,1]),sd(abarr2[,1])), add=T, col="red")
> hist(abarr2[,2], nclass=50, prob=T)
  curve(dnorm(x, mean(abarr2[,2]),sd(abarr2[,2])), add=T, col="red")
#### Agreement is closer now, with n=200.

##-------------------------------------------------

## (c) Generate 10000 independent random variables with density 
## proportional to   (1+2*x)*x^2*(1-x)^3 on (0,1). Do this by an 
## efficient Accept-Reject method.

> f0 = function(x) (1+2*x)*x^2*(1-x)^3
> curve(f0,0,1)  
   ### looks like a bell curve !
  optimize(f0,c(0,1), maximum=T)
$maximum
[1] 0.4520288

$objective
[1] 0.06401555

> integrate(function(x) f0(x),0,1)
0.03095238 with absolute error < 3.4e-16
> integrate(function(x) x*f0(x),0,1)$val/.03095238
[1] 0.4615385      #### so mean is very close to mode
> integrate(function(x) x^2*f0(x),0,1)$val/.03095238- 0.4615385^2
[1] 0.03057196

## so let's try normal with mean 0.4520288 and sd sqrt(.03057196)
> curve(dnorm(x,0.4520288,sqrt(.03057196))/.03095238, add=T, col="blue")
## need slightly larger "sd" and multiple

> curve(dnorm(x,0.4520288,.2)*.03095238*1.12, add=T, col="orange")
> optimize(function(f0(x)/(dnorm(x,0.4520288,.2)*.03095238*1.12),c(0,1))

### So our bounding density for f0/.03095238  is 
###            dnorm(x,.4520288,.2) truncated to (0,1),
###  or g(x)= dnorm(x,.4520288,.2)/(pnorm((1-.4520288)/.2)-pnorm(-.4520288/.2))
### multiplied by
> 1.12*(pnorm((1-.4520288)/.2)-pnorm(-.4520288/.2))
[1] 1.103223

### So an accept-reject method based on this truncated normal bounding density
###   wastes only around 10.3% of thee RNGs generated.

## But here is a still less wasteful and easier method
# f0(x)= (1+2x)*x^2*(1-x)^3 is upper bounded by 1.35*x^2*(1-x)^(2.425).

# use optimize function to check that the ratio is less than 1.  

> curve(f0,0,1) 
> curve(1.35*x^2*(1-x)^(2.425), add=T, col="green")
> optimize(function(x) f0(x)/(1.35*x^2*(1-x)^(2.425)), c(0,1), maximum=T)
$maximum
[1] 0.4523879

$objective
[1] 0.9980036

### NB The constant of integration that makes x^2*(1-x)^(2.425) a density is
###  beta(3,3.425). So we can see that a Beta(3,3.425) bounding density 
###  requires only a multiple   1.35*beta(3,3.425)/.03095238 < 1.06096 to
###  bound  f0/.03095238 above, uniformly. Now the more efficient 
###  Accept-Reject Method is as follows:


# First, generate Xst from Beta(3, 3.5), U from Uniform(0,1)

# X = Xst if f(Xst)/(dgamma(Xst))>=U


> 1-pbinom(999, round(1000*1.06096*1.03), 1/1.06096)
[1] 0.9999125

RNGb = function(N)  {
    ### assume N >= 1000
    M = round(N*1.03*1.06096)
    Xst = rbeta(M,3,3.425)
    U = runif(M)
    Xst[ U <= f0(Xst)/(.03095238*1.06096*dbeta(Xst,3,3.425))][1:N] }

> hist(RNGb(1e6), nclass=50, prob=T)
> curve(f0(x)/.03095238, add=T, col="red")
     ### Perfect agreement