/*********************************************** SAS EXAMPLE -- DUMMY VARIABLES IN REGRESSION FOR BOTH ORDINAL AND NOMINAL VARIABLES FILENAME: regress2.sas ************************************************/ /*USE PERMANENT SAS DATA SET CREATED EARLIER*/ title; libname LABDATA "c:\temp\labdata"; /*CHECK DUMMY VARIABLE CODING*/ proc freq data=labdata.werner; tables agegrp agedum1 agedum2 agedum3 agedum4; title "CHECKING DUMMY VARIABLE CODING"; run;; proc means data=labdata.werner; class agegrp; var age; run; /*Create boxplots of cholesterol for each level of agegrp*/ proc sort data=labdata.werner; by agegrp; run; proc boxplot data=labdata.werner; plot chol*agegrp / boxstyle=schematic; title "BOXPLOT TO SHOW RELATIONSHIP BETWEEN AGEGRP AND CHOLESTEROL"; run; /*MODEL WITH AGE DUMMY VARIABLES*/ proc reg data=labdata.werner; model chol = agedum2 agedum3 agedum4; AGEDUM: test agedum2, agedum3, agedum4; output out=regdat1 p=predict1 r=resid1; plot residual.*predicted.; title "MULTIPLE REGRESSION WITH DUMMY VARIABLES FOR AGE"; title2 "PLUS A TEST FOR AGE DUMMY VARIABLES"; title3 "REFERENCE AGE IS AGEGRP 1"; run; quit; proc univariate data=regdat1 plot normal; var resid1; histogram; qqplot / normal(mu=est sigma=est); run; /*SWITCH REFERENCE GROUP FOR AGE DUMMY VARIABLES*/ proc reg data=labdata.werner; model chol = agedum1 agedum2 agedum3; AGEDUM: test agedum1, agedum2, agedum3; title "MULTIPLE REGRESSION WITH DUMMY VARIABLES FOR AGE"; title2 "PLUS A TEST FOR AGE DUMMY VARIABLES"; title3 "REFERENCE AGE IS AGEGRP 4"; run;quit; /*INCLUDE AGE DUMMY VARIABLES AND CONTINUOUS COVARIATES*/ proc reg data=labdata.werner; model chol = agedum2 agedum3 agedum4 calc uric alb wt; AGEDUM: test agedum2, agedum3, agedum4; title "MULTIPLE REGRESSION WITH DUMMY VARIABLES FOR AGE"; title2 "PLUS OTHER CONTINUOUS COVARIATES"; title3 "REFERENCE AGE IS AGEGRP 1"; run;quit; /***************************************************** ANOTHER EXAMPLE USING DUMMY VARIABLES FOR A NOMINAL CATEGORICAL VARIABLE: SPECIES ******************************************************/ title; data kanga; infile "c:\temp\labdata\kanga.dat" lrecl=80; input sex 1 species 3 basal_l 5-8 occip_l 10-13 palat_l 15-18 palat_w 20-22 nasal_l 24-26 nasal_w 28-30 squam_d 32-34 lacry_w 36-38 zygom_w 40-43 orbit_w 45-47 rostr_w 49-51 occip_d 53-55 crest_w 57-59 foram_w 61-63 mandi_l 65-68 mandi_w 70-72 mandi_d 74-76 ramus_h 78-80; if species = 0 then species_dum0 = 1; if species in (1,2) then species_dum0=0; if species = 1 then species_dum1 = 1; if species in (0,2) then species_dum1=0; if species = 2 then species_dum2 = 1; if species in (0,1) then species_dum2=0; run; proc means data=kanga; class species; run; proc sort data=kanga; by species; run; proc boxplot data=kanga; plot crest_w*species / boxstyle=schematic; run; proc reg data=kanga; model crest_w = species_dum1 species_dum2; plot residual.*predicted.; output out=kanga_reg1 p=predict r=resid rstudent=rstudent; run;quit; proc univariate data=kanga_reg1 plot normal; var resid; histogram; qqplot / normal(mu=est sigma=est); run;