R Script Illustrating 2 and K Sample and Stratified Logrank Tests ================================================================= Eric Slud, STAT 702 Fall 2021 library(survival) ## We examine the syntax and computations with the "survdiff" function ## Simulate a right-censored survival dataset with 3 Treatment groups and 2 Strata ## The label Trt will take values 1:3, and the label Str values 1:2 ## All six groups have their own baseline Weibull survival distributions ## and all have independent Unif Censoring. X.Arr = C.Arr = array(0, c(60,3,2), dimnames=list(NULL,paste0("Trt",1:3), paste0("Str",1:2))) lam = c(1.4,1.1) alph = c(1.2,1.6) theta = c(1, 0.6, 0.8) ## hazard ratio for [Trt,Str] Upbd = array(c(80,60,70,55,75,60),c(3,2)) # upper limit for unif censoring in 6 groups set.seed(717) for (Trt in 1:3) for (Str in 1:2) { X.Arr[,Trt,Str] = round(10*rweibull(60,alph[Str],lam[Str])/ (theta[Trt])^(alph[Str]),2) C.Arr[,Trt,Str] = round(Upbd[Trt,Str]*runif(60),2) } Dfram = data.frame(Tim = pmin(c(X.Arr),c(C.Arr)), delt = as.numeric(c(X.Arr <=C.Arr)), Trt = rep(rep(1:3, rep(60,3)),2), Str = rep(1:2, rep(180,2))) > round(100*apply(array(Dfram$delt,c(60,3,2)),2:3,sum)/60,1) [,1] [,2] ### Death fractions = 1 -Cens frac by Trt x Str [1,] 83.3 76.7 [2,] 68.3 60.0 [3,] 70.0 81.7 tmp = survfit(Surv(Tim,delt) ~ Trt, data=Dfram) > tmp$strata ## numbers of distinct event-times in the Trt groups Trt=1 Trt=2 Trt=3 118 118 120 tmp = survfit(Surv(Tim,delt) ~ Trt, data=Dfram, subset = 1:180) plot(tmp) tmp = survfit(Surv(Tim,delt) ~ Trt, data=Dfram, subset = 181:360) lines(tmp, col="blue") > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset=1:120) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = 1:120) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 60 50 38.9 3.18 5.6 Trt=2 60 41 52.1 2.37 5.6 Chisq= 5.6 on 1 degrees of freedom, p= 0.02 > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset=1:180) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = 1:180) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 60 50 34.2 7.256 9.931 Trt=2 60 41 44.9 0.332 0.509 Trt=3 60 42 53.9 2.629 4.635 Chisq= 10.5 on 2 degrees of freedom, p= 0.005 > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset=181:360) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = 181:360) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 60 46 29.8 8.82 11.88 Trt=2 60 36 63.7 12.03 25.60 Trt=3 60 49 37.5 3.50 5.03 Chisq= 26.7 on 2 degrees of freedom, p= 2e-06 > survdiff(Surv(Tim,delt) ~ Trt + strata(Str), data=Dfram) Call: survdiff(formula = Surv(Tim, delt) ~ Trt + strata(Str), data = Dfram) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 120 96 64.0 15.96709 21.68723 Trt=2 120 77 108.5 9.16454 16.81588 Trt=3 120 91 91.4 0.00207 0.00333 Chisq= 26.4 on 2 degrees of freedom, p= 2e-06 ### Note that the observed for the combined strata are just the sum # of the observed for separate strata (1:180 and 181:360) # and same remark holds for "expected" > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset = 1:120) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = 1:120) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 60 50 38.9 3.18 5.6 Trt=2 60 41 52.1 2.37 5.6 Chisq= 5.6 on 1 degrees of freedom, p= 0.02 > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset = 181:300) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = 181:300) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 60 46 26.6 14.23 23 Trt=2 60 36 55.4 6.82 23 Chisq= 23 on 1 degrees of freedom, p= 2e-06 > survdiff(Surv(Tim,delt) ~ Trt + strata(Str), data=Dfram, subset = c(1:120,181:300)) Call: survdiff(formula = Surv(Tim, delt) ~ Trt + strata(Str), data = Dfram, subset = c(1:120, 181:300)) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 120 96 65.4 14.27 24.2 Trt=2 120 77 107.6 8.68 24.2 Chisq= 24.2 on 1 degrees of freedom, p= 9e-07 #-------------------------------------------------------------- ## Examine the calculations for 2-Trt and then 3-Trt test "by hand" -- ignore strata # Recall: tmp = survfit(Surv(Tim,delt) ~ Trt, data=Dfram) # has respectively 118, 118, 120 distinct event times in the 3 Trt groups # Create at-risk and dth table for the Trt gps with common set of times thrgp.tim = c(0,sort(unique(tmp$time))) ## only 340 unique ones, besides 0 rsk1 = rsk2 = rsk3 = c(120,rep(0,340)) dth1 = dth2 = dth3 = cns1 = cns2 = cns3 = rep(0,341) ind = match(tmp$time[1:118],thrgp.tim) rsk1[ind] = tmp$n.risk[1:118] dth1[ind] = tmp$n.ev[1:118] cns1[ind] = tmp$n.cen[1:118] ind = match(tmp$time[119:236],thrgp.tim) rsk2[ind] = tmp$n.risk[119:236] dth2[ind] = tmp$n.ev[119:236] cns2[ind] = tmp$n.cen[119:236] ind = match(tmp$time[237:356],thrgp.tim) rsk3[ind] = tmp$n.risk[237:356] dth3[ind] = tmp$n.ev[237:356] cns3[ind] = tmp$n.cen[237:356] ## Now fill in the at-risk numbers between Trt-gp event times for(i in 1:340) { if(rsk1[i+1]==0) rsk1[i+1] = rsk1[i]-dth1[i]-cns1[i] if(rsk2[i+1]==0) rsk2[i+1] = rsk2[i]-dth2[i]-cns2[i] if(rsk3[i+1]==0) rsk3[i+1] = rsk3[i]-dth3[i]-cns3[i] } ## 2-sample test-statistic "numerator" for Trt=1:2 is Obs1 = sum(dth1) ## = 96 Exp1 = sum((dth1+dth2)*rsk1/(rsk1+rsk2), na.rm=T) ## = 64.967 Obs2 = sum(dth2) ## = 77 Exp2 = sum((dth1+dth2)*rsk2/(rsk1+rsk2), na.rm=T) # = 108.033 ## Logrank denom^2 rskrat = rsk1/(rsk1+rsk2) Var = sum( rskrat*(1-rskrat)*((rsk1+rsk2-dth1-dth2)/(rsk1+rsk2))* (dth1+dth2), na.rm=T) altVar = sum( rskrat*(1-rskrat)*(dth1+dth2), na.rm=T) abs(Obs1-Exp1)/sqrt(Var) ## = 4.986, and 4.986^2 = 24.86 abs(Obs1-Exp1)/sqrt(altVar) ## = 4.942, and 4.942^2 = 24.42 # Normal p-value = 2*(1-pnorm(4.986)) = 6.16e-7, with altVar 7.7e-7 # chisq p-value = 1-pchisq(4.986^2) = 6.16e-7, with altVar 7.7e-7 > survdiff(Surv(Tim,delt) ~ Trt, data=Dfram, subset = c(1:120,181:300)) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram, subset = c(1:120, 181:300)) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 120 96 65 14.82 24.4 Trt=2 120 77 108 8.91 24.4 Chisq= 24.4 on 1 degrees of freedom, p= 8e-07 ### For 3-Trt logrank, work with different "Exp" terms and a covariance matrix rat1 = rsk1/(rsk1+rsk2+rsk3) rat2 = rsk2/(rsk1+rsk2+rsk3) dths = dth1+dth2+dth3 Obs1 = sum(dth1) ## = 96 Exp1 = sum(dths*rat1, na.rm=T) ## = 64.251 Obs2 = sum(dth2) ## = 77 Exp2 = sum(dths*rat2, na.rm=T) # = 105.134 Obs3 = sum(dth3) ## = 91 Exp3 = sum(dths*(1-rat1-rat2), na.rm=T) # = 94.615 Varmat = array(c( sum(rat1*(1-rat1)*dths, na.rm=T), rep(-sum(rat1*rat2*dths, na.rm=T),2), sum(rat2*(1-rat2)*dths, na.rm=T) ), c(2,2)) t(c(Obs1-Exp1,Obs2-Exp2)) %*% solve(Varmat) %*% c(Obs1-Exp1,Obs2-Exp2) ### Statistic value = 23.759 > (96-64.251)^2/Varmat[1,1] [1] 21.18453 > (77-105.134)^2/Varmat[2,2] [1] 12.6199 survdiff(Surv(Tim,delt) ~ Trt, data=Dfram)) Call: survdiff(formula = Surv(Tim, delt) ~ Trt, data = Dfram) N Observed Expected (O-E)^2/E (O-E)^2/V Trt=1 120 96 64.3 15.688 21.195 Trt=2 120 77 105.1 7.529 12.626 Trt=3 120 91 94.6 0.138 0.219 Chisq= 23.8 on 2 degrees of freedom, p= 7e-06 >