STAT 705, Fall 2010


Some useful "sample" functions.
----------------------------------

x <- 1:9

> sample(x)
[1] 9 1 2 6 7 4 3 5 8  #<--- Permutation of x.

> sample(x, replace=T)
[1] 7 4 5 7 1 3 9 5 7  #<--- Bootstrap sample (WITH replacement.)

> sample(x,3)
[1] 8 7 4              #<--- Sample size 3 WITHOUT replacement.

> sample(x,3, replace=T)
[1] 9 9 2              #<--- Sample size 3 WITH replacement.


                   Heart-Stroke Example
                 ========================


Bootstrapping to Assess Variability of Rates Ratios.

Stroke Case
===============

                 Strokes    No Strokes   Subjects
 Aspirin          119       10918         11037
 Placebo           98       10936         11034


Risk Ratio = Rates Ratio= ThetaHat = (119/11037)/(98/11034)=1.213956

###Problem: Use the bootstrap method to get a CI for the TRUE RR Theta.


             Aspirin Group
            ----------------
1. Generate a "population" of 119 1's and 11037-119=10918 0's.

x <- 1:11037;
x[] <- 0;
x[1:119] <- 1;
x <- sample(x)  #Permutation of x.

Check:
> length(x[x==0])
[1] 10918
> length(x[x==1])
[1] 119
> sum(x)/length(x)
[1] 0.01078192    #OK: 119/11037=0.01078192


2. Draw with replacement a sample of size 11037

> x1 <- sample(x, replace=T)   #Bootstrap sample.

Check: 
> sum(x1)/length(x1)
[1] 0.01177856     #OK.

#Repeat many times
x1 <- sample(x, replace=T)
sum(x1)/length(x1)


            Placebo Group (Control)
         -----------------------------
1. Generate a "population" of 98 1's and 11034-98=10936 0's

y <- 1:11034
y[] <- 0 
y[1:98] <- 1
y <- sample(y) #Permutation of y.


Check:
> length(y[y==0])
[1] 10936
> length(y[y==1])
[1] 98
> sum(y)/length(y)
[1] 0.008881639     #OK: 98/11034=0.008881639

2. Draw with replacement a sample of size 11034


> y1 <- sample(y, replace=T)   #Bootstrap sample.


Check: 
> sum(y1)/length(y1)
[1] 0.008972268      #OK.

#Repeat many times
y1 <- sample(y, replace=T) 
sum(y1)/length(y1)

                Bootstrap Replicate
            ---------------------------

 ThetaHatStar=(prop. 1's in x1)/(prop 1's in y1) = 0.01177856/0.008972268
                                                 = 1.312774


We need to get many such replicates: e.g. 1000 times, and get a CI 
for Theta. That is, get RR from 1000 tables 

                 Strokes     No Strokes      Subjects
 Aspirin         sum(x1)    11037-sum(x1)     11037
 Placebo         sum(y1)    11034-sum(y1)     11034


==============

======= Now the Whole Thing: Bootstarp in Strokes Case  ==========


1. Aspirin: Generate a "population" of 119 1's and 11037-119=10918 0's.

x <- 1:11037;
x[] <- 0;             ##Simpler: x <- rep(0,11037)
x[1:119] <- 1;
x <- sample(x)  #Permutation of x. Create a sample of 119 1's rest 0's


2. Placebo: Generate a "population" of 98 1's and 11034-98=10936 0's

y <- 1:11034;
y[] <- 0; 
y[1:98] <- 1;
y <- sample(y) #Permutation of y. Create a sample of 09 1's rest 0's


3. Get 1000 Replicates of ThetaHatStar:

               Proportion Succeses in Aspirin Sample
 ThetaHatStar= --------------------------------------        
               Proportion Succeses in Placebo Sample

ThetaHatStar <- 1:1000
for(i in 1:1000){
x1 <- sample(x, replace=T);
y1 <- sample(y, replace=T);
ThetaHatStar[i] <- (sum(x1)/11037)/(sum(y1)/11034)}  #Time: About 3 seconds


> ThetaHatStar[1:20]
 [1] 1.2269391 0.9197499 1.3551871 0.9803160 1.2089736 1.0297200 1.0401212
 [8] 1.1356136 1.5233953 1.1679068 1.0915400 1.2423807 1.7268032 1.2972663
[15] 0.9054142 1.1663496 1.2161642 0.8644266 1.2471857 1.1442672

4. Variability of ThetaHatStar

> summary( ThetaHatStar)
   Min. 1st Qu. Median  Mean 3rd Qu.  Max. 
 0.7557   1.112  1.213 1.225   1.333 1.793

> sqrt(var(ThetaHatStar))
[1] 0.1644986    #<---------Standard Deviation of ThetaHatStar


Rough 95% CI for Theta: Take 25th and 975th largest of the 1000 replicates
                         of ThetaHatStar.


> sort(ThetaHatStar)[25]
[1] 0.9127953   <------------Lower Limit
> sort(ThetaHatStar)[975]
[1] 1.569466    <------------Upper Limit     #CI Contains Theta=1.


Rough 95% CI for Theta: Plus/Minus Two SD from mean=1.213956

> 1.213956-2*0.1644986
[1] 0.8849588   <------------Lower Limit
> 1.213956+2*0.1644986
[1] 1.542953    <------------Upper Limit      #CI Contains Theta=1.
                                              #Close to nonparametric CI


###Now CI for the Odds Ratio

                 Strokes    No Strokes   Subjects
 Aspirin          119       10918         11037
 Placebo           98       10936         11034


Odds Ratio = ThetaHat = (119*10936)/(98*10818) = 1.227531

Bootstrap: Repeat table below many times to CI for Odds Ratio

                 Strokes    No Strokes       Subjects
 Aspirin         sum(x1)    11037-sum(x1)     11037
 Placebo         sum(y1)    11034-sum(y1)     11034


ThetaHatStar <- 1:1000
for(i in 1:1000){
x1 <- sample(x, replace=T);
y1 <- sample(y, replace=T);
ThetaHatStar[i] <-
(sum(x1)*(11034-sum(y1)))/(sum(y1)*(11037-sum(x1)))} #About 3 sec


> summary( ThetaHatStar)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
 0.7909  1.1060  1.2170  1.2260  1.3330  1.9010


> sort(ThetaHatStar)[25]
[1] 0.9269944             <------------Lower Limit
>  sort(ThetaHatStar)[975]
[1] 1.569815              <------------Upper Limit #CI Contains Theta=1.

#CI Contains Theta=1 ====> Independence 


Rough 95% CI for Theta: Plus/Minus Two SD from mean= 1.2260

> sqrt(var(ThetaHatStar))
[1] 0.165593

1.227531 - 2*0.165593 = 0.896345
1.227531 + 2*0.165593 = 1.558717 #Close to nonparametric CI


-------------------------------------------------------------


Heart Case
===============

                No. Heart Attacks
                (Fatal+Non-Fatal)       Subjects
 aspirin          104                    11037
 placebo          189                    11034

Relative Risk = Rates Ratio = ThetaHat= (104/11037)/(189/11034)=0.550115

Problem: Use the bootstrap method to get a CI for the TRUE RR Theta.


x <- 1:11037;
x[] <- 0;
x[1:104] <- 1;
x <- sample(x)  #Permutation of x. Create a sample of 104 1's rest 0's

y <- 1:11034;
y[] <- 0; 
y[1:189] <- 1;
y <- sample(y) #Permutation of y. Create a sample of 189 1's rest 0's

ThetaHatStar <- 1:1000
for(i in 1:1000){
x1 <- sample(x, replace=T);
y1 <- sample(y, replace=T);
ThetaHatStar[i] <- (sum(x1)/11037)/(sum(y1)/11034)}  #Time: About 3 seconds.


sort(ThetaHatStar)[25]
[1] 0.4270196   <------------Lower Limit for rough 95% CI
> sort(ThetaHatStar)[975]
[1] 0.6846623   <------------Upper Limit for rough 95% CI

                                       #CI Does Not Contains Theta=1.

> summary( ThetaHatStar)
   Min. 1st Qu. Median   Mean 3rd Qu.   Max. 
 0.3683  0.5069 0.5496 0.5525  0.5977 0.7655

> sqrt(var(ThetaHatStar))
[1] 0.0669852


> 0.550115-2*0.0669852
[1] 0.4161446   <------------Lower Limit for rough 95% CI
> 0.550115+2*0.0669852
[1] 0.6840854   <------------Upper Limit for rough 95% CI
 
                                       #CI Does Not Contain Theta=1.


###Now CI for the Odds Ratio

                  Heart Attacks
                (Fatal+Non-Fatal)   No HA     Subjects
 aspirin          104               10933      11037
 placebo          189               10845      11034


Odds Ratio = ThetaHat = (104*10845)/(189*10933) = 0.5458355


x <- 1:11037;
x[] <- 0;
x[1:104] <- 1;
x <- sample(x)  #Permutation of x: Create a sample of 104 1's rest 0's 

y <- 1:11034;
y[] <- 0; 
y[1:189] <- 1;
y <- sample(y) #Permutation of y. Create a sample of 189 1's rest 0's

##Bootstrap samples
x1 <- sample(x, replace=T);
y1 <- sample(y, replace=T);

Bootstrap: Repeat table below many times to CI for Odds Ratio

                 Strokes    No Strokes       Subjects
 Aspirin         sum(x1)    11037-sum(x1)     11037
 Placebo         sum(y1)    11034-sum(y1)     11034


ThetaHatStar <- 1:1000
for(i in 1:1000){
x1 <- sample(x, replace=T);
y1 <- sample(y, replace=T);
ThetaHatStar[i] <-
(sum(x1)*(11034-sum(y1)))/(sum(y1)*(11037-sum(x1)))} #About 3 sec


##Nonparametric 95% CI: Does Not Contain Theta=1 ====> Dependence!!!
sort(ThetaHatStar)[25] = 0.4135327
sort(ThetaHatStar)[975]= 0.6869988


##Normal approx 95% CI: Does Not Contain Theta=1 ====> Dependence!!!
0.5458355-2*sqrt(var(ThetaHatStar))=0.406205
0.5458355+2*sqrt(var(ThetaHatStar))=0.685466 ##Similar to nonparametric CI