---------------------------------------------------------- CREATION OF DATASETS for HW 4 and Soutions. 4/16/03 ---------------------------------------------------------- > covmat <- cbind(rbinom(300,1,0.3), rnorm(300,0,2), rexp(300)/2) > cubrt <- function(x) sign(x)*(abs(x)^(1/3)) > scors <- cubrt( 0.25 + 0.2*covmat[,1] + 0.16*covmat[,2] - 0.34*covmat[,3]) > summary(scors) Min. 1st Qu. Median Mean 3rd Qu. Max. -1.119 -0.4738 0.5165 0.2156 0.7262 1.053 > ydat <- rbinom(300,1,pnorm(scors)) > outmat <- cbind(round(covmat,6), ydat) > outmat[1:10,] ydat [1,] 0 0.766217 0.747395 1 [2,] 0 -2.897755 0.249926 1 [3,] 0 -1.099680 0.373495 0 [4,] 0 3.936900 2.357651 0 [5,] 0 1.049600 0.542241 1 [6,] 1 -1.246224 0.973149 1 [7,] 0 -1.381025 0.118884 1 [8,] 0 2.968462 0.113160 0 [9,] 0 3.023349 0.034036 1 [10,] 0 0.657744 1.379732 1 > write(t(outmat),file="/usr/local/StatData/SplusCrs/Dset3A.asc", ncolumns=4) > !more /usr/local/StatData/SplusCrs/Dset3A.asc 0 0.766217 0.747395 1 0 -2.897755 0.249926 1 0 -1.09968 0.373495 0 0 3.9369 2.357651 0 0 1.0496 0.542241 1 1 -1.246224 0.973149 1 0 -1.381025 0.118884 1 0 2.968462 0.11316 0 0 3.023349 0.034036 1 0 0.657744 1.379732 1 --------------------------------------------------------- > Uvec <- rexp(1000)/2 > Vvec <- rexp(1000)*exp(1) > Epvec <- rbinom(1000,1,0.356) > mean(Epvec) [1] 0.354 > Tvec <- ifelse(Epvec==1,Uvec,Vvec) > mean(Tvec) [1] 1.83983572 > mean(Vvec) [1] 2.58381821 > mean(Uvec) [1] 0.529980864 > write(round(Tvec,6),"/usr/local/StatData/SplusCrs/Dset3B.asc", ncolumns=8) -------------------------------------------------------------- Solution of Problem 3(A) -------------------------------------------------------------- ### True beta is (0.25,0.2, 0.16, -0.34) > beta0 <- c(0.25,0.2, 0.16, -0.34) > logLik(beta0) [1] -188.2229 > logLik <- function(beta) { dfv <- pnorm(cubrt(beta[1]+outmat[,1:3] %*% beta[2:4])) sum(ifelse(outmat[,4]==1,log(dfv),log(1-dfv)))} > nglgLik <- function(x) -logLik(x) ### This is negative log of likelihood > logLik(rep(0,4)) [1] -207.9442 > tmpmax <- nlmin(nglgLik, rep(0,4),max.fcal=1000, print.level=1) it nf f reldf preldf reldx 0 1 2.079e2 1 1000 1.943e2 6.567e-2 4.218e3 1.000e0 ***** function evaluation limit ***** function 1.942892e2 reldx 1.0000e0 function evaluations:1000 gradient evaluations: 8 preldf 4.2178e3 npreldf 4.7351e11 > tmpmax[[1]] [1] 0.03386395 0.01110562 0.04776609 0.01882897 > logLik(tmpmax[[1]]) [1] -194.2893 > tmpmax <- nlmin(nglgLik, tmpmax[[1]],max.fcal=1000, max.iter=50, print.level=1) it nf f reldf preldf reldx 0 1 1.862e2 1 8 1.862e2 8.498e-5 1.093e-4 1.624e-3 2 9 1.862e2 4.167e-5 3.776e-3 3.656e-3 3 11 1.861e2 2.791e-4 3.442e-5 4.187e-4 ... 47 86 1.829e2 1.404e-9 1.225e-9 3.320e-5 48 87 1.829e2 2.173e-10 2.072e-10 2.957e-5 49 88 1.829e2 2.232e-12 2.027e-12 3.132e-6 ***** relative function convergence ***** function 1.829122e2 reldx 3.1324e-6 function evaluations: 88 gradient evaluations: 235 preldf 2.0271e-12 npreldf 2.0271e-12 > tmpmax $x: [1] 0.15519485 0.10377217 0.08129857 -0.13991738 ### compare with beta0 = 0.25 0.20 0.16 -0.34 $converged: [1] T $conv.type: [1] "relative function convergence" > logLik(tmpmax[[1]]) [1] -182.9122 while logLik(beta0) = -188.22 ### Still need to do some debugging of Gradmat and GradSrch and NRroot to apply them here: > Gradmat function(parvec, infcn, eps = 1e-06) { # Function to calculate the difference-quotient approximate gradient # (matrix) of an arbitrary input (vector) function infcn # Now recoded to use central differences ! dd <- length(parvec) aa <- length(infcn(parvec)) epsmat <- (diag(dd) * eps)/2 gmat <- array(0, dim = c(aa, dd)) for(i in 1:dd) gmat[, i] <- (infcn(parvec + epsmat[, i]) - infcn(parvec - epsmat[, i]))/eps if(aa > 1) gmat else c(gmat) } > Gradmat(rep(.5,4),nglgLik) [1] 58.133234 1.188919 -23.127797 34.983386 > tmpmax <- GradSrch(rep(0.1,4),nglgLik,1,unitfac=T) $nstep: [1] 25 $initial: [1] 0.1 0.1 0.1 0.1 $final: [1] 0.08763017 0.09070030 0.06418227 -0.06758223 $funcval: [1] 185.032 ### So this was pretty effective !!! > tmpmax <- GradSrch(tmpmax$final,nglgLik,1,unitfac=T) > tmpmax $nstep: [1] 25 $initial: [1] 0.08763017 0.09070030 0.06418227 -0.06758223 $final: [1] 0.11203496 0.07624878 0.05777465 -0.10002773 $funcval: [1] 183.4276 > tmpmax <- GradSrch(c( 0.11203, 0.07625, 0.05777, -0.10002), nglgLik,.1, unitfac=T) $final: [1] 0.11408547 0.07604826 0.05893395 -0.10248878 $funcval: [1] 183.3428 ### Not quite there yet: hard to get there ! > GradSrch(tmpmax$final,nglgLik,1,unitfac=T,eps=1e-8) ### Doesn't improve matters !!! ### Next specially code an analytical gradient function for ### this problem. > gradlLik function(beta) { scor <- cubrt(beta[1] + outmat[, 1:3] %*% beta[2:4]) dfv <- pnorm(scor) dns <- dnorm(scor) likmat <- matrix(rep(((dns/(3 * scor^2)) * ifelse(outmat[, 4] == 1, 1/dfv, -1/(1 - dfv))),4), ncol=4) * cbind(rep(1, nrow(outmat)), outmat[, 1:3]) c(t(likmat) %*% rep(1, nrow(outmat))) } > gradlLik(rep(.5,4)) [1] -58.133224 -1.188919 23.127783 -34.983386 > gradlLik(tmpmax$initial) [1] -89.862761 -5.182296 81.875120 -71.385540 > gradlLik(tmpmax$final) [1] -143.032567 -1.909501 76.500237 -129.990227> gradlLik(tmpmax$initial) > Gradmat(tmpmax$final,gradlLik) [,1] [,2] [,3] [,4] [1,] 839150.607 -1834.5809 -180117.11 828451.8299 [2,] -1834.581 -1834.5809 4140.51 -969.4631 [3,] -180120.635 4140.5099 35918.44 -175935.6962 [4,] 828452.161 -969.4631 -175932.27 819530.4134 ### Note that the Hessian is not positive-definite !! So cannot ### expect NRroot to work !!! ### Next try a gradient search with optimized steps. > GrdSrch2 function(inipar, infcn, maxstep, gradfunc = NULL, eps = 1e-06) { # Function to implement one Steepest-descent step with optimized # step-length. assign("Infcn", infcn, frame = 0) assign("Inipar", inipar, frame = 0) assign("Eps", eps, frame = 0) gradvc <- if(is.null(gradfunc)) Gradmat(inipar, Infcn, Eps) else gradfunc(inipar) tmax <- maxstep/max(abs(gradvc)) assign("Gradvc", gradvc, frame = 0) tmpmin <- nlminb(0, function(t) Infcn(Inipar + t * Gradvc), lower = 0, upper = tmax) c(tmpmin, list(newpar = Inipar + tmpmin$param * Gradvc)) } > nglgLik(rep(.5,4)) ## = 232.03 > tmpo <- GrdSrch2(rep(.5,4),nglgLik,.2,gradlLik) > tmpo$obj ## 226.1387 > tmpo$newpar [1] 0.3000000 0.4959097 0.5795682 0.3796441 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 213.3247 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 203.1496 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 196.9765 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 189.5322 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 188.6734 > tmpo <- GrdSrch2(tmpo$newpar,nglgLik,.2,gradlLik) > tmpo$obj [1] 188.6723 > tmpo$newpar [1] 0.15656334 0.39485435 0.09408027 -0.14289685 > Gradmat(tmpo$newpar,nglgLik) [1] -20.235022 12.763439 6.804246 -11.363220 ### not so small !!! > Gradmat(tmpo$newpar, gradlLik) [,1] [,2] [,3] [,4] [1,] -331516.64615 -41.19719 308490.4544 -160036.67754 [2,] -41.19719 -41.19719 225.4807 -26.36499 [3,] 308490.11205 225.48071 -402548.0733 73738.27910 [4,] -160036.79539 -26.36499 73738.5430 -126787.61806 ### Check gradlLik function !!! > NRroot(tmpo$newpar,gradlLik) ###??? SO WE HAVE THE SAME RESULT FROM DIFFERENT STARTING-POINTS and methods FINALLY, LET'S PRODUCE A CONTOUR PLOT OF LIKELIHOOD WITH RESPECT TO THE FIRST AND SECOND PARAMETER COMPONENTS, WITH THE THIRD AND FOURTH FIXED AT THEIR THEORETICAL VALUES FROM WHICH I SIMULATED ... > motif() > likmat <- matrix(rep(0,225),ncol=15) > for (i in (-7):7) for (j in (-7):7) { + likmat[i+8,j+8] <- tempfnc3(c(-0.9+0.4*i,0.5+0.5*j))} contour(-0.9+0.4*((-7):7),0.5+0.5*((-7):7),likmat,nlevels=12, + xlab="Intercept",ylab="Coeff of Z1") > title("Contour Plot of LogLik with b3=.66, b4=-.34") THIS PRODUCES CONTOUR PLOT AWAY FROM GLOBAL OPTIMUM NOW PRODUCE ANOTHER CONTOUR PLOT AT GLOBAL OPTIMUM SO THIS WAS AN EXTREMELY DIFFICULT OPTIMUM TO DOCUMENT !! ----------------------------------------------------------------- > Hessmat(tmpmod$x,tempfnc2) [,1] [,2] [,3] [,4] [1,] 210.320650 193.267624 162.003744 144.950718 [2,] 193.267624 193.267624 144.950718 142.108547 [3,] 162.003744 144.950718 162.003744 142.108547 [4,] 144.950718 142.108547 142.108547 142.108547 > > solve(.Last.value) [,1] [,2] [,3] [,4] [1,] 0.1567931020 -0.141740964 -0.114145378 0.0959573784 [2,] -0.1417409642 0.147837080 0.100372672 -0.1036339687 [3,] -0.1141453783 0.100372672 0.133763331 -0.1177077175 [4,] 0.0959573784 -0.103633969 -0.117707718 0.1305020347 > sqrt(diag(.Last.value)) [1] 0.395971087 0.384495878 0.365736697 0.361250654 THESE ARE THE ESTIMATED STANDARD DEVIATIONS OF THE FOUR PARAMETERS' MLE'S, QUITE LARGE FOR A PROBLEM WITH THIS MUCH DATA ! > eigen(Hessmat(tmpmod$x,tempfnc2)) $values: [1] 646.45147014 43.55427763 15.61295582 2.08186123 $vectors: [,1] [,2] [,3] [,4] [1,] 0.554448970 -0.407515896 0.491104380 0.534166286 [2,] 0.525441094 -0.504505149 -0.448930656 -0.517539832 [3,] 0.471800107 0.571418240 0.466659857 -0.482819253 [4,] 0.440343791 0.502878260 -0.582671719 0.462281807 THIS CONFIRMS THAT THE INFORMATION MATRIX WAS QUITE ILL-CONDITIONED, WHICH WAS WHY THE OPTIMIZATION PROBLEM TURNED OUT TO BE INACCESSIBLE TO STEEPEST-ASCENT ! ----------------------------------------------------------------------- &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& SOLUTION TO PROBLEM 3(B) &&&&&&&&& &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& > mixfunc function(thet) { sum(2 * Tvec - log(2 * thet[2] + (1 - thet[2]) * thet[1] * exp((2 - thet[1]) * Tvec))) } > nlmin(mixfunc,c(1,0.5)) -> tmpmix > tmpmix $x: [1] 0.366338011 0.399882567 $converged: [1] T $conv.type: [1] "relative function convergence" > Hessmat(tmpmix$x,mixfunc) [,1] [,2] [1,] 3751.66564 1045.91891 [2,] 1045.91891 1273.29258 > Gradvec(tmpmix$x,mixfunc) [1] -0.002032720658 -0.000659383659 > -mixfunc(tmpmix$x) [1] -1545.92981 NOTE THAT INITIALLY > Hessmat(c(1,0.5),mixfunc) > thtmp <- c(1,0.5) > cat("lambda p loglik NormGrad",fill=T) lambda p loglik NormGrad > for (i in 1:7) { + thtmp <- thtmp - solve(Hessmat(thtmp,mixfunc),Gradvec(thtmp,mixfunc)) + cat(c(thtmp,mixfunc(thtmp),sqrt(sum(Gradvec(thtmp,mixfunc)^2))),fill=T) } 0.122986807519447 0.180742891097666 1929.08998400759 3667.77511470657 0.196977807016496 0.301997585188868 1661.30352066191 1447.92273341696 0.282748475336596 0.411793955885438 1559.8252792032 388.97047720377 0.352337275684709 0.411406026295845 1546.19353627448 39.19709695065 0.366693613370382 0.397427702841125 1545.93336156085 3.68433065100523 0.366273367977891 0.399934399200624 1545.9298190039 0.169007634523479 0.366338553149086 0.399883710532414 1545.92981339435 0.00937485680337354 NOW FOR EM ITERATIONS DOING THE SAME THING ... > EMiter function(thet) { pvec <- 1/(1 + (thet[1]/2) * (1/thet[2] - 1) * exp((2 - thet[1]) * Tvec )) th2 <- mean(pvec) c((1 - th2)/mean(Tvec * (1 - pvec)), th2) } > thtmp <- c(1,0.5) > for (i in 1:20) { + thtmp <- EMiter(thtmp) + cat(c(thtmp,mixfunc(thtmp)),fill=T) } > for (i in 1:11) { + thtmp <- EMiter(thtmp) + cat(c(thtmp,mixfunc(thtmp)),fill=T) } 0.387559771311953 0.379350230280137 1546.50328473669 0.37562827054685 0.379917425657923 1546.14118181312 0.373673921040313 0.38392083882705 1546.06477897557 0.37217885265946 0.387176294928516 1546.01559323296 0.370981250141336 0.389784525185866 1545.98412308446 0.370024240782566 0.391867790225358 1545.96409258317 0.369261346824179 0.393527857566243 1545.95139648781 0.368654374593778 0.394848233362844 1545.94337601895 0.368172201491716 0.395896872832328 1545.93832271677 0.367789636837103 0.396728721524053 1545.93514560049 0.367486398163725 0.397387983022901 1545.93315142998