/*********************************************** SAS EXAMPLE -- SIMPLE LINEAR REGRESSION CHECKING FOR INFLUENTIAL OBSERVATIONS. CHECKING FOR OUTLIERS. CHECKING NORMALITY OF RESIDUALS. DEMONSTRATION OF ODS GRAPHICS. FILENAME: simple_regression.sas ************************************************/ OPTIONS NODATE FORMDLIM=" " PAGENO=1; TITLE; LIBNAME LABDATA "F:\510\2007"; DATA LABDATA.WERNER; INFILE "F:\510\2007\DATA\werner2.dat"; INPUT ID 1-4 AGE 5-8 HT 9-12 WT 13-16 PILL 17-20 CHOL 21-24 ALB 25-28 1 CALC 29-32 1 URIC 33-36 1; IF HT = 999 THEN HT = .; IF WT = 999 THEN WT = .; IF CHOL = 600 THEN CHOL = .; IF ALB = 99 THEN ALB = .; IF CALC = 99 THEN CALC = .; IF URIC = 99 THEN URIC = .; RUN; /******************************************************* CHECK DATA ********************************************************/ OPTIONS NOLABEL; TITLE "DESCRIPTIVE STATISTICS"; PROC MEANS DATA=LABDATA.WERNER; RUN; /*********************************************************** CORRELATION ************************************************************/ PROC CORR DATA=LABDATA.WERNER; VAR AGE CHOL; RUN; /*********************************************************** SIMPLE SCATTER PLOT, OR DO THIS IN INSIGHT ************************************************************/ GOPTIONS RESET=ALL; GOPTIONS DEVICE=WIN TARGET=WINPRTM; SYMBOL1 COLOR=BLACK VALUE=DOT INTERPOL=RL; TITLE "SCATTER PLOT WITH REGRESSION LINE"; PROC GPLOT; PLOT CHOL*AGE; RUN; /*********************************************************** SIMPLE REGRESSION ************************************************************/ OPTIONS LABEL; TITLE "SIMPLE LINEAR REGRESSION WITH NO OPTIONS"; PROC REG DATA=LABDATA.WERNER; MODEL CHOL=AGE; RUN; QUIT; TITLE "SIMPLE LINEAR REGRESSION WITH DIAGNOSTIC PLOTS"; TITLE2 "AND OUTPUT DATA SET TO GET RESIDUALS"; PROC REG DATA=LABDATA.WERNER; MODEL CHOL=AGE / P R CLI CLM; PLOT RSTUDENT. * PREDICTED. ; PLOT COOKD. *OBS.; OUTPUT OUT=OUTREG1 P=PREDICT1 R=RESID1 RSTUDENT=RSTUD1 COOKD = COOKD LCL=LCL1 UCL=UCL1 LCLM=LCLM1 UCLM=UCLM1; RUN;QUIT; TITLE "PARTIAL LISTING OF OUTPUT DATA SET"; TITLE2 "TO CHECK FOR POSSIBLE OUTLIERS"; PROC PRINT DATA=OUTREG1 LABEL; WHERE ABS(RSTUD1) >=3; VAR ID PILL AGE CHOL PREDICT1 RESID1 RSTUD1 COOKD LCL1 UCL1 LCLM1 UCLM1; RUN; TITLE "CHECKING RESIDUALS FROM FIRST REGRESSION"; TITLE2 "FOR NORMALITY"; PROC UNIVARIATE DATA=OUTREG1 PLOT NORMAL; VAR RSTUD1; HISTOGRAM; QQPLOT / NORMAL(MU=EST SIGMA=EST); RUN; /********************************************************* RERUN THE REGRESSION ON A SUBSET OF OBSERVATIONS, WITHOUT THE INFLUENTIAL OBSERVATIONS. COMPARE THE REGRESSION COEFFICIENTS FOR THIS NEW MODEL. GET EXPERIMENTAL ODS GRAPHICS OUTPUT. **********************************************************/ TITLE "RERUN THE REGRESSION WITHOUT THE INFLUENTIAL CASES"; ODS HTML; ODS GRAPHICS ON; ODS rtf file = "F:\510\SIMPLE_REGRESSION.RTF"; PROC REG DATA=LABDATA.WERNER; WHERE ID NOT IN ( 1797, 3134); MODEL CHOL=AGE ; RUN;QUIT; ODS GRAPHICS OFF; ODS RTF CLOSE;