/*********************************************** SAS EXAMPLE 5 -- SIMPLE REGRESSION MULTIPLE REGRESSION DUMMY VARIABLES IN REGRESSION FILENAME: REGRESS.SAS ************************************************/ OPTIONS NODATE FORMDLIM=" " PAGENO=1; TITLE; DATA WERNER; INFILE "c:\temp\labdata\werner2.dat"; INPUT ID $ 1-4 AGE 5-8 HT 9-12 WT 13-16 PILL 17-20 CHOL 21-24 ALB 25-28 1 CALC 29-32 1 URIC 33-36 1; IF HT = 999 THEN HT = .; IF WT = 999 THEN WT = .; IF CHOL = 600 THEN CHOL = .; IF ALB = 99 THEN ALB = .; IF CALC = 99 THEN CALC = .; IF URIC = 99 THEN URIC = .; /*CREATE AGEGRP AND AGE INDICATOR DUMMY VARIABLES*/ IF AGE NOT = . THEN DO; IF AGE < 25 THEN AGEGRP = 1; IF AGE >= 25 AND AGE < 32 THEN AGEGRP = 2; IF AGE >= 32 AND AGE < 42 THEN AGEGRP = 3; IF AGE >= 42 THEN AGEGRP = 4; AGEDUM1 = (AGEGRP = 1); AGEDUM2 = (AGEGRP = 2); AGEDUM3 = (AGEGRP = 3); AGEDUM4 = (AGEGRP = 4); END; RUN; /******************************************************* CHECK DATA TO BE SURE IT IS OK ********************************************************/ OPTIONS NOLABEL; PROC MEANS DATA=WERNER; TITLE "DESCRIPTIVE STATISTICS"; RUN; PROC FREQ; TABLES AGEGRP AGEDUM1-AGEDUM4; TITLE "CHECKING DUMMY VARIABLE CODING"; RUN; PROC MEANS; CLASS AGEGRP; VAR AGE; RUN; /*********************************************************** SIMPLE REGRESSION ************************************************************/ OPTIONS LABEL; PROC REG DATA=WERNER; MODEL ALB=WT; TITLE "SIMPLE REGRESSION"; RUN; QUIT; PROC REG DATA=WERNER; MODEL ALB=WT / P R CLI CLM; PLOT (RSTUDENT.) * (PREDICTED. WT OBS.); OUTPUT OUT=OUTREG1 P=PREDICT1 R=RESID1 RSTUDENT=RSTUD1 LCL=LCL1 UCL=UCL1 LCLM=LCLM1 UCLM=UCLM1; TITLE "SIMPLE REGRESSION WITH DIAGNOSTIC PLOTS"; TITLE2 "AND OUTPUT DATA SET TO GET RESIDUALS"; RUN;QUIT; PROC PRINT DATA=OUTREG1(FIRSTOBS=1 OBS=10) LABEL; VAR ID PILL WT ALB PREDICT1 RESID1 RSTUD1 LCL1 UCL1 LCLM1 UCLM1; TITLE "PARTIAL LISTING OF OUTPUT DATA SET"; TITLE2 "TO CHECK RESIDUALS AND PREDICTED VALUES"; RUN; PROC UNIVARIATE DATA=OUTREG1 PLOT NORMAL; VAR RESID1 RSTUD1; HISTOGRAM; TITLE "CHECKING RESIDUALS FROM FIRST REGRESSION"; TITLE2 "FOR NORMALITY"; RUN; /*********************************************** MULTIPLE REGRESSION--CHECK FOR COLLINEARITY ***********************************************/ PROC CORR NOMISS; VAR CHOL CALC URIC AGE; TITLE "PEARSON CORRELATION MATRIX"; RUN; PROC PLOT DATA=WERNER HPERCENT=50 VPERCENT=50; PLOT CHOL*CALC="*"; PLOT CHOL*URIC="*"; PLOT CHOL*AGE="*"; TITLE "SCATTER PLOTS OF CHOLESTEROL VS OTHER VARIABLES"; RUN;QUIT; /*TO GET PLOTS, YOU CAN USE A SCATTER PLOT MATRIX IN INSIGHT ANALYSIS... MULTIVARIATE(YX)... CHOOSE CHOL CALC URIC AGE AS Y OUTPUT ... SCATTER PLOT MATRIX*/ PROC REG DATA=WERNER; MODEL CHOL=CALC URIC AGE / STB TOL VIF; PLOT PREDICTED.*RESIDUAL.; OUTPUT OUT=OUTREG2 P=PREDICT2 R=RESID2 RSTUDENT=RSTUD2; TITLE "MULTIPLE REGRESSION ANALYSIS"; TITLE2 "WITH RESIDUAL DIAGNOSTICS"; RUN;QUIT; PROC UNIVARIATE DATA=OUTREG2 PLOT NORMAL; VAR RESID2; HISTOGRAM; TITLE "CHECKING RESIDUALS FROM MULTIPLE REGRESSION"; RUN; /*********************************************** DUMMY VARIABLES IN REGRESSION ************************************************/ /*MODEL WITH AGE DUMMY VARIABLES*/ PROC REG DATA=WERNER; MODEL CHOL = AGEDUM1 AGEDUM2 AGEDUM3 ; AGEDUM: TEST AGEDUM1, AGEDUM2, AGEDUM3; TITLE "MULTIPLE REGRESSION WITH DUMMY VARIABLES FOR AGE"; TITLE2 "PLUS A TEST FOR AGE DUMMY VARIABLES"; TITLE3 "REFERENCE AGE IS AGEGRP 4"; RUN;QUIT; /*SAME MODEL USING PROC GLM*/ PROC GLM DATA=WERNER; CLASS AGEGRP; MODEL CHOL = AGEGRP / SOLUTION; LSMEANS AGEGRP; TITLE "ANOVA MODEL WITH AGEGRP AS A CLASS VARIABLE"; RUN; QUIT;