Homework Solutions to Problems 20-21. ==================================== [NB solution is in Splus 6.0] (20) /* Saved SASsales.asc data as HW20dat, ftp'd to cluster */ libname home "."; data home.HW20; infile"HW20dat"; input Mth Qtr Rep $ Type $ Units Price; Amount = Units*Price; run; NOTE: The data set HOME.HW20 has 110 observations and 7 variables. proc sort data=home.HW20 ; /* could omit the data = option */ by QTR REP; proc means data=home.HW20 N Mean; var amount ; by QTR Rep; run; ------ Qtr=1 Rep=Hollings ----- The MEANS Procedure Analysis Variable : Amount N Mean ----------------- 5 7558.90 ------ Qtr=1 Rep=Jones -------- N Mean ----------------- 5 6451.83 ------ Qtr=1 Rep=Smith -------- N Mean ----------------- 8 9286.86 ------ Qtr=2 Rep=Hollings ----- N Mean ----------------- 6 10314.04 ------ Qtr=2 Rep=Jones ------- N Mean ----------------- 6 8023.23 ------ Qtr=2 Rep=Smith ------- N Mean ----------------- 6 11687.38 ------ Qtr=3 Rep=Hollings ---- N Mean ----------------- 15 4642.82 ------- Qtr=3 Rep=Jones ------ N Mean ----------------- 21 6725.55 ------ Qtr=3 Rep=Smith ------- N Mean ----------------- 21 6886.85 ----- Qtr=4 Rep=Hollings ----- N Mean ----------------- 6 8667.08 ----- Qtr=4 Rep=Jones -------- N Mean ----------------- 6 12245.98 ------ Qtr=4 Rep=Smith ------- N Mean ----------------- 5 7712.67 /* Or alternatively: */ proc tabulate data=home.hw20; class Qtr Rep ; var amount; table Qtr*Rep, amount*mean amount*n; run; The SAS System 11:09 Friday, April 2, 2004 1 ----------------------------------------------------- | | Amount | Amount | | |------------+------------| | | Mean | N | |-------------------------+------------+------------| |Qtr |Rep | | | |------------+------------| | | |1 |Hollings | 7558.90| 5.00| | |------------+------------+------------| | |Jones | 6451.83| 5.00| | |------------+------------+------------| | |Smith | 9286.86| 8.00| |------------+------------+------------+------------| |2 |Hollings | 10314.04| 6.00| | |------------+------------+------------| | |Jones | 8023.23| 6.00| | |------------+------------+------------| | |Smith | 11687.38| 6.00| |------------+------------+------------+------------| |3 |Hollings | 4642.82| 15.00| | |------------+------------+------------| | |Jones | 6725.55| 21.00| | |------------+------------+------------| | |Smith | 6886.85| 21.00| |------------+------------+------------+------------| |4 |Hollings | 8667.08| 6.00| | |------------+------------+------------| | |Jones | 12245.98| 6.00| | |------------+------------+------------| | |Smith | 7712.67| 5.00| ----------------------------------------------------- proc sort data=home.hw20 out=tmp20; by Type Mth Rep; data tmpA (drop=ctr) tmpB (drop=ctr) ; set tmp20; if _N_ = 1 then ctr = 0; if Type EQ "Standard" then do; ctr+1; if ctr LT 16 then output tmpA; end; else output tmpB; proc print data=tmpA; title 'Standard Units Sold, by Mth & Salesrep: 15 cases'; proc print data=tmpB; title 'Deluxe Units Sold, by Mth & Salesrep'; run; ... /* OK, worked ! */ (21) libname home "."; data home.PBC (drop=labels1-labels10); infile "pbcdata.txt"; if _N_=1 then input labels1-labels10 $ ; else input obs idnum dth evttime treatgp logbili agevar cirrh cchol albumin; if obs ne . ; run; .. "Invalid data" errors for the fileds in the blank line!! NOTE: The data set HOME.PBC has 216 observations and 10 variables. proc print; where obs < 6; run; Obs obs idnum dth evttime treatgp logbili agevar cirrh cchol albumin 1 1 180 0 0 1 1.00000 36.598 0 0 37 2 2 188 0 0 0 1.36173 29.964 0 0 35 3 3 56 0 0 1 0.77815 81.451 0 0 36 4 4 19 0 0 0 1.92428 121.510 1 0 41 5 5 190 0 0 1 1.00000 60.340 0 0 32 (ii) proc sort data=home.pbc out=home.altpbc; by descending treatgp; proc means data=home.altpbc mean N ; var agevar; by descending treatgp; run; /* this works, but a better output format is given by proc tabulate: */ proc tabulate data=home.altpbc; class treatgp; var agevar; table treatgp, agevar*mean, agevar*n ; run; proc sort data=home.pbc out=home.altpbc; by descending treatgp; proc means data=home.altpbc mean N ; var agevar; by treatgp; run; ----------------------------------------------------- | | agevar | agevar | | |------------+------------| | | Mean | N | |-------------------------+------------+------------| |treatgp | | | |-------------------------| | | |0 | 55.20| 112.00| |-------------------------+------------+------------| |1 | 51.07| 104.00| ----------------------------------------------------- data tmpavg (drop=agevar); set home.altpbc (keep=agevar treatgp); by descending treatgp; if first.treatgp then do; ctr=0; agemean=0; end; ctr+1; agemean+agevar; if last.treatgp then do; agemean=agemean/ctr; output; end; proc print; run; data tmpavg; set home.altpbc (keep=agevar treatgp); if first.treatgp then do; ctr=0; agemean=0; end; ctr+1; agemean+agevar; if last.treatgp then do; agemean=agemean/ctr; output; end; proc print; run; Obs treatgp ctr agemean 1 1 104 51.0668 2 0 112 55.1976O (iii) proc univariate data=home.pbc; var logbili albumin; proc means data=home.pbc; var logbili albumin; run; /* By default, "means" gives only: N mean stddev min max while "univariate" gives quantiles, extremes , t, kurtosis, you-nasme-it. But both have options (to give more or less output.) */ (iv) proc sort data=home.pbc out=alt2pbc; by dth treatgp cirrh cchol; proc means data=alt2pbc min max; var logbili; by dth treatgp cirrh cchol; run; /* Lots of output in 16 separate segments! Here is the alternative method. */ data tmpminmax (drop= logbili); set alt2pbc (keep= dth treatgp cirrh cchol logbili); by dth treatgp cirrh cchol; retain logbili; if first.cchol then do; minlbil=1000; maxlbil=-1000; end; minlbil=min(minlbil, logbili); maxlbil=max(maxlbil, logbili); if last.cchol then output; run; proc print; run; Obs dth treatgp cirrh cchol minlbil maxlbil 1 0 0 0 0 0.84510 2.07188 2 0 0 0 1 0.84510 2.17319 3 0 0 1 0 1.11394 1.92428 4 0 1 0 0 0.69897 1.96379 5 0 1 0 1 1.74036 2.26951 6 0 1 1 0 1.20412 1.98677 7 1 0 0 0 1.00000 2.46538 8 1 0 0 1 1.47712 2.72263 9 1 0 1 0 1.20412 2.50786 10 1 0 1 1 1.44716 2.46835 11 1 1 0 0 0.77815 2.34635 12 1 1 0 1 1.25527 2.56110 13 1 1 1 0 1.32222 2.03342 14 1 1 1 1 1.71600 2.62634 /* NOTE that not all cases occur !!! */