Homework Solution to Problem 24. ================================= [Involves both Splus 6.0 and SAS in separate parts.] > Bassfr <- Bass[,c(1,3,5,12)] Bassfr$Alk <- factor(ifelse(Bassfr$Alk < 15,"A", ifelse(Bassfr$Alk < 40, "B", "C"))) > contrasts(Bassfr$Alk) B C A 0 0 B 1 0 C 0 0 ### Default in Splus 6.0 is "treatment" contrasts. (a) > lm.trt <- lm(logAvM ~ . , data=Bassfr) > Bassfr2 <- Bassfr; Bassfr2$Alk <- C(Bassfr2$Alk, helmert) > contrasts(Bassfr2$Alk) [,1] [,2] A -1 -1 B 1 -1 C 0 2 > lm.helm <- lm(logAvM ~ . , data=Bassfr2) > round(rbind(trt.lm = lm.trt$coef, helm.lm = lm.helm$coef),4) (Intercept) Alk1 Alk2 Calc agedat trt.lm -0.7836 -0.4737 -1.3467 5e-04 0.5273 helm.lm -1.3904 -0.2368 -0.3700 5e-04 0.5273 ## The model for lm.trt has intercept and Alk terms equal to ## Int.trt + Alk1.trt * I[Alk==B] + Alk2.trt * I[Alk==C], ## while that for lm.hel has terms: ## Int.helm + Alk1.helm * (I[Alk==B]-I[Alk==A]) + ## Alk2.helm * (2*I[Alk==C]-I[Alk==A]-I[Alk==B]) ## NOTE: the predictors under both models are identical, > sum(abs(lm.trt$fit - lm.helm$fit)) [1] 1.809664e-14 > sum(abs(lm.helm$fit - c(model.matrix(lm.helm) %*% lm.helm$coef))) [1] 7.605028e-15 > sum(abs(lm.trt$fit - c(model.matrix(lm.trt) %*% lm.trt$coef))) [1] 3.325118e-14 ## We conclude that (respectively under Alk= A,B,C cases) ## Int.trt = Int.helm - Alk1.helm - Alk2.helm ## Int.trt + Alk1.trt = Int.helm + Alk1.helm - Alk2.helm ## Int.trt + Alk2.trt = Int.helm + 2 * Alk2.helm ## which implies: Alk1.trt = 2*Alk1.helm, and ## Alk2.trt = 3*Alk2.helm+Alk1.helm (c) Studentized coefficient: > c(lm.trt$coef[1:3] %*% c(1,-3,1))/(summary(lm.trt)$sigma* sqrt(sum(c(1,-3,1) * c(summary(lm.trt)$cov.unscaled[ 1:3,1:3] %*% c(1,-3,1))))) [1] -1.09218 ### p-value using > 2*(1-pt(abs(.Last.value),lm.trt$df.resid)) [1] 0.2802058 -------- SAS part: --------------------------------------- (b) libname home "."; data basstmp; set home.bass (keep = AvMrc Alk Calc Agedata); logAvM = log(AvMrc); if(Alk < 15) then Alk=1; else do; if(Alk < 40) then Alk=2; else Alk=3; end; proc glm ; class Alk ; model logAvM = Alk Calc Agedata / solution ; run; The GLM Procedure Dependent Variable: logAvM Standard Parameter Estimate Error t Value Pr > |t| Intercept -2.130359482 B 0.35792842 -5.95 <.0001 Alk 1 1.346732952 B 0.32859848 4.10 0.0002 Alk 2 0.873072822 B 0.30736144 2.84 0.0066 Alk 3 0.000000000 B . . . Calc 0.000512831 0.00574910 0.09 0.9293 agedata 0.527338165 0.21705837 2.43 0.0189 ## Here we can see that SAS tried initially to use both the ## intercept and the dummy-indicator coding with variables ## Alk_j = I[Alk=j] for j=1,2,3. ## But the Intercept effectively replaced Alk_3. ## The coding for variables and coefficients is therefore ## given by: ## Int.SAS + Alk1.SAS * I[Alk=A] + Alk2.SAS * I[Alk=B] ## = Int.trt + Alk1.trt * I[Alk==B] + Alk2.trt * I[Alk==C] ## Thus: Int.SAS = Alk2.trt + Int.trt = -0.7836 -1.3467 = -2.1303, ### and Alk1.SAS = Int.trt -Int.SAS = -0.7836 + 2.1303 = 1.3467 ### and Alk2.SAS = Int.trt + Alk1.trt - Int.SAS = .873