/*********************************************** SAS EXAMPLE -- SIMPLE REGRESSION MULTIPLE REGRESSION COLLINEARITY DIAGNOSTICS FILENAME: REGRESS1.SAS ************************************************/ OPTIONS NODATE FORMDLIM=" " PAGENO=1; TITLE; LIBNAME LABDATA "C:\TEMP\LABDATA"; DATA LABDATA.WERNER; INFILE "c:\temp\labdata\werner2.dat"; INPUT ID $ 1-4 AGE 5-8 HT 9-12 WT 13-16 PILL 17-20 CHOL 21-24 ALB 25-28 1 CALC 29-32 1 URIC 33-36 1; IF HT = 999 THEN HT = .; IF WT = 999 THEN WT = .; IF CHOL = 600 THEN CHOL = .; IF ALB = 99 THEN ALB = .; IF CALC = 99 THEN CALC = .; IF URIC = 99 THEN URIC = .; WTALB=WT+ALB; /*CREATE AGEGRP AND AGE INDICATOR DUMMY VARIABLES*/ IF AGE NOT = . THEN DO; IF AGE < 25 THEN AGEGRP = 1; IF AGE >= 25 AND AGE < 32 THEN AGEGRP = 2; IF AGE >= 32 AND AGE < 42 THEN AGEGRP = 3; IF AGE >= 42 THEN AGEGRP = 4; AGEDUM1 = (AGEGRP = 1); AGEDUM2 = (AGEGRP = 2); AGEDUM3 = (AGEGRP = 3); AGEDUM4 = (AGEGRP = 4); END; RUN; /******************************************************* CHECK DATA ********************************************************/ OPTIONS NOLABEL; PROC MEANS DATA=LABDATA.WERNER; TITLE "DESCRIPTIVE STATISTICS"; RUN; /*********************************************************** CORRELATION ************************************************************/ PROC CORR DATA=LABDATA.WERNER; VAR AGE CHOL; RUN; /*********************************************************** SIMPLE SCATTER PLOT, OR DO THIS IN INSIGHT ************************************************************/ GOPTIONS RESET=ALL; GOPTIONS DEVICE=WIN TARGET=WINPRTM; SYMBOL1 COLOR=BLACK VALUE=DOT INTERPOL=RL; PROC GPLOT; PLOT CHOL*AGE; TITLE "SCATTER PLOT WITH REGRESSION LINE"; RUN; /*********************************************************** SIMPLE REGRESSION ************************************************************/ OPTIONS LABEL; PROC REG DATA=LABDATA.WERNER; MODEL CHOL=AGE; TITLE "SIMPLE REGRESSION WITH NO OPTIONS"; RUN; QUIT; PROC REG DATA=LABDATA.WERNER; MODEL CHOL=AGE / P R CLI CLM; PLOT (RSTUDENT.) * (PREDICTED. AGE OBS.); OUTPUT OUT=OUTREG1 P=PREDICT1 R=RESID1 RSTUDENT=RSTUD1 LCL=LCL1 UCL=UCL1 LCLM=LCLM1 UCLM=UCLM1; TITLE "SIMPLE REGRESSION WITH DIAGNOSTIC PLOTS"; TITLE2 "AND OUTPUT DATA SET TO GET RESIDUALS"; RUN;QUIT; PROC PRINT DATA=OUTREG1(OBS=10) LABEL; VAR ID PILL AGE CHOL PREDICT1 RESID1 RSTUD1 LCL1 UCL1 LCLM1 UCLM1; TITLE "PARTIAL LISTING OF OUTPUT DATA SET"; TITLE2 "TO CHECK RESIDUALS AND PREDICTED VALUES"; RUN; PROC UNIVARIATE DATA=OUTREG1 PLOT NORMAL; VAR RSTUD1; HISTOGRAM; QQPLOT / NORMAL(MU=EST SIGMA=EST); TITLE "CHECKING RESIDUALS FROM FIRST REGRESSION"; TITLE2 "FOR NORMALITY"; RUN; /*********************************************** MULTIPLE REGRESSION--CHECK FOR COLLINEARITY ***********************************************/ /*FIRST, EXAMINE CORRELATION MATRIX*/ PROC CORR DATA=LABDATA.WERNER NOMISS; VAR CHOL AGE CALC URIC ALB WT WTALB; TITLE "PEARSON CORRELATION MATRIX"; RUN; /*TO GET SCATTER-PLOT MATRIX, YOU CAN USE INSIGHT: ANALYSIS > MULTIVARIATE(YX). CHOOSE CHOL CALC URIC AGE AS Y CLICK ON THE OUTPUT BUTTON AND CHOOSE > SCATTER-PLOT MATRIX*/ PROC REG DATA=LABDATA.WERNER; MODEL CHOL = AGE CALC URIC ALB WT / STB TOL VIF COLLIN; PLOT RESIDUAL.*PREDICTED.; OUTPUT OUT=OUTREG2 P=PREDICT2 R=RESID2 RSTUDENT=RSTUD2; TITLE "MULTIPLE REGRESSION ANALYSIS"; TITLE2 "WITH RESIDUAL DIAGNOSTICS"; RUN;QUIT; PROC UNIVARIATE DATA=OUTREG2 PLOT NORMAL; VAR RESID2; HISTOGRAM; QQPLOT / NORMAL(MU=EST SIGMA=EST); TITLE "CHECKING RESIDUALS FROM MULTIPLE REGRESSION"; RUN; PROC PRINT DATA=OUTREG2; WHERE RESID2 <=-120; RUN; PROC REG DATA=LABDATA.WERNER; WHERE CHOL > 50; MODEL CHOL = AGE CALC URIC ALB WT / STB TOL VIF COLLIN; PLOT RESIDUAL.*PREDICTED.; OUTPUT OUT=OUTREG3 P=PREDICT3 R=RESID3 RSTUDENT=RSTUD3; TITLE "MULTIPLE REGRESSION ANALYSIS"; TITLE2 "WITH RESIDUAL DIAGNOSTICS"; TITLE3 "REMOVE OUTLIER FOR CHOLESTEROL"; RUN;QUIT; PROC UNIVARIATE DATA=OUTREG3 PLOT NORMAL; VAR RESID3; HISTOGRAM; QQPLOT / NORMAL(MU=EST SIGMA=EST); TITLE "CHECKING RESIDUALS FROM MULTIPLE REGRESSION"; TITLE2 "WITH OUTLIER REMOVED"; RUN; /*COLLINEARITY DIAGNOSTICS*/ PROC REG DATA=LABDATA.WERNER; MODEL CHOL = AGE CALC URIC ALB WT WTALB/ STB TOL VIF COLLIN; TITLE "MULTIPLE REGRESSION ANALYSIS"; TITLE2 "WITH COLLINEARITY"; RUN;QUIT; PROC REG DATA=LABDATA.WERNER; MODEL CHOL = AGE CALC URIC WT WTALB/ STB TOL VIF COLLIN; TITLE "MULTIPLE REGRESSION ANALYSIS"; TITLE2 "REMOVE ALB, BUT STILL HAS COLLINEARITY"; RUN;QUIT;