STAT 705, Fall 2010 Examples of the Use of the Parametric Bootstrep Method (J. A. Rice pp. 249-252) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& NOTE: In Splus var(x)=sum((x-mean(x))^2)/(length(x)-1) NOTE: To generate n N(mu,sigma^2) observations use "rnorm(n,mu,sigma)" where the SD sigma is used not sigma^2. 1. SIMULATED DATA (N(mu,sigma^2) EXAMPLE ------------------------------------------- Problem: Estimate the variances of xbar=mean(x), ssquare=var(x) using the bootstrep method. Generate n=200 observations from N(0,1): > x <- rnorm(200,0,1) > n <- length(x) > n [1] 200 Method of Moments Estimation: > xbar <- mean(x) <---MEM Estimator of mu=0. > xbar [1] 0.006017323 <---OK near 0. > ssquare <- var(x) <---MEM Estimator of sigma^2=1. Here var(x) is computed with 1/(n-1) instead of 1/n. It does not change things much. But strictly speaking need to use 1/n. > ssquare [1] 1.190256 <---OK near 1. Now Bootstrap: Generate 1000 samples each of size n=200 from N(xbar,ssquare). For each sample obtain xbarSTAR, ssquareSTAR. So we have xbarSTAR[1],...,xbarSTAR[1000] and ssquareSTAR[1],...,ssquareSTAR[1000]. The variability in the xbarSTAR provides an estimate for the variance of xbar. The variability in the ssquareSTAR provides an estimate for the variance of ssquare. xbarSTAR <- rep(NA,1000) ssquareSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rnorm(n,xbar,sqrt(ssquare)) xbarSTAR[i] <- mean(x1) ssquareSTAR[i] <- var(x1)} Check: > mean(xbarSTAR) [1] 0.007333637 > mean(ssquareSTAR) [1] 1.187861 Bootstrap Esimates of Standard Errors of MEM Estimators xbar, ssquare: > sqrt(var(xbarSTAR)) [1] 0.07950643 > sqrt(var(ssquareSTAR)) [1] 0.1201532 Check: EXACT SD of xbar: > sqrt(1/200) [1] 0.07071068 <---Agrees With Bootstrep Est. 0.07950643 !!! EXACT SD of ssquare: > sqrt(2/199) [1] 0.1002509 <---Agrees With Bootstrep Est. 0.1201532 !!! 95% Confidence Intervals: For Mu=0: > xbar+1.96*sqrt(var(xbarSTAR)) [1] 0.1618499 > xbar-1.96*sqrt(var(xbarSTAR)) [1] -0.1498153 95% CI for Mu (-0.15,0.16) ###OK! Captures Mu=0. For SigmaSqr^2=1: > ssquare+1.96*sqrt(var(ssquareSTAR)) [1] 1.425757 > ssquare-1.96*sqrt(var(ssquareSTAR)) [1] 0.9547561 95% CI for SigmaSqr (0.95,1.43) ###OK! Captures SigmaSqr=1. Histograms of xbarSTAR, ssquareSTAR: (FIGURE 1; Note symmetry of histograms.) par(mfrow=c(2,2)) hist(xbarSTAR, main="1000 xbarSTAR") legend(-0.265,200,c("AVE=0.0073", "SD=0.080"), bty="n") hist(ssquareSTAR, main="1000 ssquareSTAR") legend(0.8,280,c("AVE=1.19", "SD=0.12"), bty="n") Plot a sequence of 150 estimates: tsplot(xbarSTAR[1:150], main="xbarSTAR. Mu=0") #Oscillation approx. about 0 as expected. tsplot(ssquareSTAR[501:650], main="ssquareSTAR. SigmaSqr=1") #Oscillation approx. about 1.19 not about 1 as expected. ------------------------------------------------------------ Another N(0,1) Case: Generate n=200 observations from N(0,1): > x <- rnorm(200,0,1) > n <- length(x) > n [1] 200 Method of Moments Estimation. Get xbar and ssquare: > xbar <- mean(x) > xbar [1] 0.01405646 > ssquare <- var(x) > ssquare [1] 1.029741 Now Bootstrap. Generate 1000 samples of size 200 from N(xbar,ssquare) and get 1000 xbarSTAR and 1000 ssquareSTAR: xbarSTAR <- rep(NA,1000) ssquareSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rnorm(n,xbar,sqrt(ssquare)) xbarSTAR[i] <- mean(x1) ssquareSTAR[i] <- var(x1)} Check: > mean(xbarSTAR) [1] 0.01003094 > mean(ssquareSTAR) [1] 1.03244 Bootstrap Esimates of Standard Errors of MEM Estimators xbar, ssquare: > sqrt(var(xbarSTAR)) [1] 0.07157792 > sqrt(var(ssquareSTAR)) [1] 0.104312 95% Confidence Intervals: For Mu=0: xbar+1.96*sqrt(var(xbarSTAR))=0.1543492 xbar-1.96*sqrt(var(xbarSTAR))=-0.1262363 0###OK! (-0.13, 0.15) Captures Mu=0. For SigmaSqr=1: ssquare+1.96*sqrt(var(ssquareSTAR))=1.234193 ssquare-1.96*sqrt(var(ssquareSTAR))=0.8252898 ###OK! (0.83, 1.23) Captures SigmaSqr=1. Histograms of xbarSTAR, ssquareSTAR: (FIGURE 2; Note symmetry of histograms.) par(mfrow=c(2,2)) hist(xbarSTAR, main="1000 xbarSTAR") legend(-0.27,250,c("mean=0.01", "SD=0.072"), bty="n") hist(ssquareSTAR, main="1000 ssquareSTAR") legend(0.7,210,c("mean=1.03", "SD=0.104"), bty="n") Plot a sequence of 150 estimates: tsplot(xbarSTAR[1:150], main="xbarSTAR. Mu=0") #Oscillation approx. about 0 as expected. tsplot(ssquareSTAR[1:150], main="ssquareSTAR. SigmaSqr=1") #Oscillation approx. about 1 as expected. ----------------------------------------------- General Function for Normal Bootstrep: ###GENERATE n OBS FROM N(Mu,SigmaSqr): Mu <- 20 SigmaSqr <- 9 n <- 200 x <- rnorm(n,Mu,sqrt(SigmaSqr)) ###GET ESTIMATED SE FOR MEM ESTIMATORS OF Mu,SigmaSqr FROM A FUNCTION. Bootstrep <- function(x){ ###CREATING A FUNCTION CALLED "Bootstrep". n <- length(x) ###GET MEM ESTIMATORS xbar, ssquare OF Mu,SigmaSqr: xbar <- mean(x) ssquare <- var(x) ###NOW BOOTSTREP. GENERATE 1000 SAMPLES OF SIZE n FROM ###N(xbar,ssquare) AND GET 1000 PAIRS (xbarSTAR, ssquareSTAR): xbarSTAR <- rep(NA,1000) ssquareSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rnorm(n,xbar,sqrt(ssquare)) xbarSTAR[i] <- mean(x1) ssquareSTAR[i] <- var(x1)} ###BOOTSTREP ESTIMATES OF SE'S OF MEM ESTIMATES xbar, ssquare: EstSExbarSTAR <- sqrt(var(xbarSTAR)) EstSEssquareSTAR <- sqrt(var(ssquareSTAR)) ###EXACT SD OF xbar AND ssquare: ExactSExbar <- sqrt(SigmaSqr/n) ExactSEssquare <- sqrt(2*SigmaSqr^2/(n-1)) list(n,xbar,ssquare,EstSExbarSTAR,EstSEssquareSTAR, ExactSExbar,ExactSEssquare) } ###To apply: > Bootstrep(x) [[1]]: [1] 200 <------ n [[2]]: [1] 19.97334 <------ xbar (MEM for Mu=20) [[3]]: [1] 8.088353 <------ ssquare (MEM for SigmaSqr=9) [[4]]: [1] 0.1993207 <------ EstSExbarSTAR (Bootstrep est. of SE of xbar) [[5]]: [1] 0.8429728 <------ EstSEssquareSTAR (Bootstrep est. of SE of ssquare) [[6]]: [1] 0.212132 <------ ExactSExbar (Exact SE of xbar) [[7]]: [1] 0.9022585 <------ ExactSEssquare (Exact SE of ssquare) 95% Confidence Intervals: For Mu=20: xbar+1.96*EstSExbarSTAR = 20.36401 xbar-1.96*EstSExbarSTAR = 19.58267 0###OK! (19.5, 20.4) Captures Mu=20. For SigmaSqr=9: ssquare+1.96*EstSEssquareSTAR = 9.74058 ssquare-1.96*EstSEssquareSTAR = 6.436126 ###OK! (6.44, 9.74) Captures SigmaSqr=9. ------------------------ Use same data x ~ N(Mu=20,SigmaSqr=9) as in previous example. Another Run With a Modification: In list() use n=n, bar=xbar, etc., and define an object "Boot". We can then identify the output by name and use in computation. Bootstrep <- function(x){ n <- length(x) ###GET MEM ESTIMATORS xbar, ssquare OF Mu,SigmaSqr: xbar <- mean(x) ssquare <- var(x) ###NOW BOOTSTREP. GENERATE 1000 SAMPLES OF SIZE n FROM ###N(xbar,ssquare) AND GET 1000 PAIRS (xbarSTAR, ssquareSTAR): xbarSTAR <- rep(NA,1000) ssquareSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rnorm(n,xbar,sqrt(ssquare)) xbarSTAR[i] <- mean(x1) ssquareSTAR[i] <- var(x1)} ###BOOTSTREP ESTIMATES OF SE'S OF MEM ESTIMATES xbar, ssquare: EstSExbarSTAR <- sqrt(var(xbarSTAR)) EstSEssquareSTAR <- sqrt(var(ssquareSTAR)) ###EXACT SD OF xbar AND ssquare: ExactSExbar <- sqrt(SigmaSqr/n) ExactSEssquare <- sqrt(2*SigmaSqr^2/(n-1)) list(n=n,xbar=xbar,ssquare=ssquare,EstSExbarSTAR=EstSExbarSTAR, EstSEssquareSTAR=EstSEssquareSTAR, ExactSExbar=ExactSExbar,ExactSEssquare=ExactSEssquare) } To Apply: Define an object "boot" > Boot <- Bootstrep(x) > Boot \$n: [1] 200 \$xbar: [1] 20.01805 \$ssquare: [1] 10.71231 \$EstSExbarSTAR: [1] 0.2293428 \$EstSEssquareSTAR: [1] 1.112367 \$ExactSExbar: [1] 0.212132 \$ExactSEssquare: [1] 0.9022585 Can now refer to the variables by appending Boot; e.g. > Boot\$ExactSEssquare [1] 0.9022585 > Boot\$n [1] 200 > attributes(Boot) \$names: [1] "n" "xbar" "ssquare" "EstSExbarSTAR" [5] "EstSEssquareSTAR" "ExactSExbar" "ExactSEssquare" Thus: For Mu=20: > Boot\$xbar+1.96*Boot\$EstSExbarSTAR [1] 20.46756 > Boot\$xbar-1.96*Boot\$EstSExbarSTAR [1] 19.56854 95% CI for Mu=20: (19.56854,20.46756) For SigmaSqr=9: > Boot\$ssquare+1.96*Boot\$EstSEssquareSTAR [1] 12.89255 > Boot\$ssquare-1.96*Boot\$EstSEssquareSTAR [1] 8.532068 95% CI for SigmaSqr=9: (8.532068,12.89255) 2. REAL DATA (RAINFALL AMOUNT) EXAMPLE ---------------------------------------- Fit Gamma(alpha,Lambda) To Rainfall Amounts From LeCam and Neyman (1967). Data in Neyman.dat/Splus. Taken from Rice pp.383-384 problem 42. Bootstrap as in Rice Example C pp. 249-252 Method of moments: ======================= lambda=mean(x)/var(x), alpha=mean(x)^2/var(x) Note: var(x) in Splus uses 1/(n-1) instead of 1/n but the difference is small. We use var(x) as in Splus. Rice does the same thing also for method of moments! Note: If X ~ Gamma(a,1) Then Y=X/lambda ~ Gamma(a,lambda). Splus only gives Gamma(a,1) samples. So to get Gamma(a,lambda) we divide every observation by lambda. Rainfall Data: x <- scan("Neyman.dat") n <- length(x) n [1] 227 > mean(x) [1] 0.2243921 <---- As in Rice. > var(x) [1] 0.1338252 <---- As in Rice. Method of Moments Estimation: lambda <- mean(x)/var(x) lambda [1] 1.676755 alpha <- mean(x)^2/var(x) alpha [1] 0.3762506 Now Bootstrap: Generate 1000 samples each of size n=227 from Gamma(alpha=0.3762506,lambda=1.676755). For each sample obtain alphSTAR,lamSTAR. So we have alphSTAR[1],...,alphSTAR[1000] and lamSTAR[1],...,lamSTAR[1000]. The variability in the alphSTAR provides an estimate for the variance of the MEM estimate of alpha which here we call alpha as well. The variability in the lamSTAR provides an estimate for the variance of the MEM estimate of lambda which here we call lambda as well. lamSTAR <- rep(NA,1000) alphSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rgamma(n, alpha)/lambda lamSTAR[i] <- mean(x1)/var(x1) alphSTAR[i] <- mean(x1)^2/var(x1)} Check: > mean(alphSTAR) [1] 0.3904278 > mean(lamSTAR) [1] 1.748474 Bootstrap Esimates of Standard Errors of MEM Estimators alpha, lambda: > sqrt(var(alphSTAR)) [1] 0.06481527 <--As in Rice!!! > sqrt(var(lamSTAR)) [1] 0.3462474 <--As in Rice!!! Histograms of alphSTAR, lamSTAR from Method of Moments: motif() par(oma=c(0,0,5,0)) #<-- To put overall caption par(mfrow=c(1,2)) hist(alphSTAR, main="1000 alphSTAR") legend(0.15,250,c("mean=.390", "SD=0.065"),bty="n") #(FIGURE 3) hist(lamSTAR, main="1000 lamSTAR") legend(2.1,180,c("mean=1.748", "SD=0.346"),bty="n") mtext("Gamma Fit: Bootstrap Variability of MME", side=3,outer=T,cex=1.5) #OVERALL TITLE IN TOP OF OUTER MARGIN!!! Plot a sequence of 150 bootstrep estimates: tsplot(alphSTAR[1:150], main="alphSTAR") legend(2,0.54,c("mean=0.390"), bty="n") tsplot(lamSTAR[1:150], main="lamSTAR") legend(2,2.7,c("mean=1.748"), bty="n") --------------------- Using the same Neyman.dat data Method of Maximum Likelihood: =============================== Note: gamma(x) is the gamma function. lgamma(x) is the log-gamma function. =============================================== How to minimize a function: = func <- function(x) {(x[1]-3)^2+(x[2]+1)^2} = > min.func <- nlminb( start=c(1,1),obj =func) = > min.func\$parameters #<--Optimal Values = [1] 3 -1 = > min.func\$par[1] = [1] 3 = > min.func\$par[2] = [1] -1 = =============================================== n <- length(x) ###theta <- c(alpha,lambda) minus.likelihood <- function(theta) { -(n*theta[1]*log(theta[2])-n*lgamma(theta[1])+(theta[1]-1)*sum(log(x))- theta[2]*sum(x))} max.likelihood <- nlminb( start=c(0.3762506,1.676755),obj =minus.likelihood) > max.likelihood\$par[1] [1] 0.4407914 > max.likelihood\$par[2] [1] 1.964379 MLE: alpha=0.4407914, lambda=1.9643791 <--- As in RICE!!! Now Bootstrap: Generate 1000 samples each of size n=227 from Gamma(alpha=0.4407914,lambda=1.9643791). For each sample obtain alphSTAR,lamSTAR. So we have alphSTAR[1],...,alphSTAR[1000] and lamSTAR[1],...,lamSTAR[1000]. The variability in the alphSTAR provides an estimate for the variance of the MLE estimate of alpha which here we call alpha as well. The variability in the lamSTAR provides an estimate for the variance of the MEM estimate of lambda which here we call lambda as well. n <- 227 alpha <- 0.4407914 lambda <- 1.9643791 lamSTAR <- rep(NA,1000) alphSTAR <- rep(NA,1000) for(i in 1:1000){ x1<- rgamma(n, alpha)/lambda minus.likelihood <- function(theta) { -(n*theta[1]*log(theta[2])-n*lgamma(theta[1])+(theta[1]-1)*sum(log(x1))- theta[2]*sum(x1))} max.likelihood <- nlminb( start=c(0.3762506,1.676755),obj =minus.likelihood) alphSTAR[i] <- max.likelihood\$par[1] lamSTAR[i] <- max.likelihood\$par[2] print(c(i,alphSTAR[i],lamSTAR[i])) } Check: > mean(alphSTAR) [1] 0.4459786 > mean(lamSTAR) [1] 1.995381 Bootstrap Esimates of Standard Errors of MLE Estimators alpha, lambda: > sqrt(var(alphSTAR)) [1] 0.03527703 <--Close to Rice!!! Smaller than MEM!!! > sqrt(var(lamSTAR)) [1] 0.2511891 <--Close to Rice!!! Smaller than MEM!!! Histograms of alphSTAR, lamSTAR from Maximum Likelihood: motif() par(oma=c(0,0,5,0)) #<-- To put overall caption par(mfrow=c(1,2)) hist(alphSTAR, main="1000 alphSTAR") legend(0.46,200,c("mean=0.446", "SD=0.035"),bty="n") #(FIGURE 4) hist(lamSTAR, main="1000 lamSTAR") legend(2.3,250,c("mean=1.995", "SD=0.251"),bty="n") mtext("Gamma Fit: Bootstrap Variability of MLE", side=3,outer=T,cex=1.5) #OVERALL TITLE IN TOP OF OUTER MARGIN!!! Bootstrap Confidence Interval: > sort(alphSTAR)[25] [1] 0.3822655 > sort(alphSTAR)[975] [1] 0.5153893 > c(alpha-2*sort(alphSTAR)[975], alpha-2*sort(alphSTAR)[25]) [1] (0.3661935, 0.4993173) #<--- 95% CI for alpha Normal approximation: (0.3716484,0.5099344) > c(lambda-2*sort(lamSTAR)[975],lambda-2*sort(lamSTAR)[25]) [1] (1.407562, 2.376535) #<--- 95% CI for lambda Normal approximation: (1.472049, 2.45671) ----------------- Histograms: #Figure 5 Normalized Histogrm and Estimated pdf's from MME and MLE: hist(x, prob=T) title("Gamma Fit to 227 Rainfall Totals From Illinois") Add MEM estimated pdf: lambda <- 1.676755 alpha <- 0.3762506 y <- seq(0.05,2.5,0.01) f <- (lambda^alpha)*y^(alpha-1)*exp(-lambda*y)/gamma(alpha) lines(y,f,type="l") Add MLE estimated pdf: alpha <- 0.4407914 lambda <- 1.9643791 y <- seq(0.05,2.5,0.01) f <- (lambda^alpha)*y^(alpha-1)*exp(-lambda*y)/gamma(alpha) lines(y,f,type="p") legend(1,2,c("---- MEM: alpha=0.376, lambda=1.677", "**** MLE: alpha=0.441, lambda=1.964")) legend(1,3,"Data Source: Le Cam and Neyman (1967)")