HW18 STAT 705 Solution,   Fall 2017

[almost the same as HW 19 Stat 705  Fall 2016]

> pts = seq(5,70, len=326)
  tmphist = hist(precip, nclass=12, prob=T)
> sum(tmphist$density)*5
[1] 1

### Part (A)
##    (i) A kernel density estimate with dunif(x,-.5,.5) density and bandwidth 2.

  kermat1 = outer(pts, precip, function(x,y) 0.5*(dunif((x-y)/2,-.5,.5))
  dens1 = cbind(pts, c(kermat1 %*% rep(1/70,70)))
  lines(dens1, lty=2)
                            ### very wild and erratic

### (ii) Kernel density estimate with kernel dnorm(x,0,2), 

  kermat2 = outer(pts, precip, function(x,y) dnorm(x,y,2))
  dens2 = cbind(pts, c(kermat2 %*% rep(1/70,70)))
  lines(dens2, lty=4, col="blue")

### to be able to visualize where th points are:

for(j in 1:length(precip)) segments(precip[j],0,precip[j],0.002, lwd=2)

### draws short thick vertical line segments at the bottom of the graph at each precip pt

### Recalling that the variances of the two kernels Unif[-.5,.5] and dnorm are different 
###    via a factor of 12, the effective bandwidth of the uniform is smaller by factor sqrt(12)
### Only some of the visual difference between the two kernel-density estimates is due to the 
###    boxiness of the rectangular kernel; most is due to the bandwidth difference, as could 
###    be seen by overplotting

 kermat1B = outer(pts, precip, function(x,y) dunif((x-y)/(2*sqrt(12)),-.5,.5)/(2*sqrt(12)))
  dens1B = cbind(pts, c(kermat1B %*% rep(1/70,70)))
  lines(dens1B, lty=2, col="red")

### (iii) Find the best ISE cross-validated dlogis(x) kernel-density estimate over (5,70) 

### The solution uses the "LgsCV" cross-validation function [tailored to logistic kernel]
### as given in Class Log 14 and reproduced here with "galaxies" replaced by "precip":

> LgisCV2 <- function(b, Pts) {
  ### little function to evaluate least-sq error criterion
  ###  for precip data with bin-width b
   kertmp <- outer(Pts, precip, function(x,y,B) dlogis(x,y,B), B=b)
   denstmp <- cbind(Pts, c(kertmp %*% rep(1/70,70)))
   delta <- c(range(Pts) %*% c(-1,1))/(length(Pts)-1)
   intsq <- delta*(sum(denstmp[,2]^2) - 0.5*(denstmp[1,2]^2+
        denstmp[length(Pts),2]^2))
  ### The numerical integration here uses trapezoid rule.
   penalty <- numeric(70)
   for(i in 1:70) penalty[i] <- mean(
        dlogis(precip[i],precip[-i],b))
   intsq - 2*mean(penalty)  }

> bestft = nlm(function(w)  LgisCV2(w, pts), 2)
> bestft
$minimum
[1] -0.02191921

$estimate
[1] 2.889569

$gradient
[1] -1.906679e-09

$code
[1] 1

$iterations
[1] 4

### Thus the bandwidth is chosen as 2.890, and we overplot (in orange) to show the fit

kermat3 = outer(pts, precip, function(x,y) dlogis(x,y,2.890))
  dens3 = cbind(pts, c(kermat3 %*% rep(1/70,70)))
  lines(dens3, lty=7, col="orange")

## This is a much smoothed-out density. Interesting that the cross-validation chooses this.

### (iv) 3-component normal mixture, by  MLE; parameterize by  w[1]=qlogis(p1), w[2]=qlogis(p2/(1-p1)),
###   w[3]=mu1, mu2=mu1+exp(w[4]), mu3=mu2+exp(w[5]), sig1=exp(w[6]), sig2=exp(w[7]), sig3=exp(w[8])

> parms = function(w) {
     pvec = c(plogis(w[1]), (1-plogis(w[1]))*plogis(w[2]), 
              1-plogis(w[1])-(1-plogis(w[1]))*plogis(w[2]))
     muvec = c(w[3], w[3]+exp(w[4]), w[3]+exp(w[4])+exp(w[5]))
     sigvec = exp(w[6:8])
     list(p=pvec, mu=muvec, sig=sigvec) }
   
  NlLk2 = function(w) {
   parm = parms(w)
   -sum( log( c( cbind( dnorm(precip, parm$mu[1], parm$sig[1]), 
                        dnorm(precip, parm$mu[2], parm$sig[2]),
                        dnorm(precip, parm$mu[3], parm$sig[3]) ) %*% parm$p ) )) }

> fitmix = nlm(NlLk2, c(0,0,15,log(15),log(15),log(5),log(10),log(5)))
### starting values matter a lot to whether you can get to convergence. 
### Here I chose the starting mu and sig values from the histogram by eye:
> unlist(parms(c(0,0,15,log(15),log(15),log(5),log(10),log(5))))
   p1    p2    p3   mu1   mu2   mu3  sig1  sig2  sig3 
 0.50  0.25  0.25 15.00 30.00 45.00  5.00 10.00  5.00 

> fitmix
$minimum
[1] 273.4842

$estimate
[1] -1.366734  2.269994 13.451348  3.219030  3.007504  1.509582  1.931489
[8]  1.597882

$gradient
[1]  1.105480e-06  1.918157e-07 -3.834961e-08  5.533750e-05  3.081372e-05
[6] -6.705615e-06 -1.072806e-05  5.100276e-06

$code
[1] 1

$iterations
[1] 45

> round(unlist(parms(fitmix$est)),3)
    p1     p2     p3    mu1    mu2    mu3   sig1   sig2   sig3 
 0.203  0.722  0.075 13.451 38.455 58.692  4.525  6.900  4.943 

> mixdens = function(x, parm)   c( cbind( dnorm(x, parm[4], parm[7]), 
         dnorm(x, parm[5], parm[8]), dnorm(x, parm[6], parm[9])) %*% parm[1:3] )

tmphist = hist(precip, nclass=12, prob=T)
curve(mixdens(x, unlist(parms(fitmix$est))), add=T, lty=6, col="orange")

### To my eye, this is the best fit of all. All three peaks are accommodated, and 
## the smoothing of the density is just enough to follow the histrogram peaks well.


### (B) Hand in the R code, and draw overlaid single plot

> hist(precip, breaks=seq(5,70,length=13), prob=T, ylim=c(0,.056),
       xlim=c(4,72), main="Histogram of  precip  with density est's")
  lines(dens1, lty=2, col="green")
  lines(dens2, lty=4, col="blue")
  lines(dens3, lty=7, col="orange")
  curve(mixdens(x, unlist(parms(fitmix$est))), add=T, lty=5, col="red")
  legend(46,y=.055, legend=c("rect ker, h=2", "norm ker, h=2",
         "CV opt logis", "mixnorm k=3"), lty=c(2,4,7,5),
         col=c("green","blue","orange","red"))