*SAS EXAMPLE FOR LOGISTIC REGRESSION USING PROC LOGISTIC AND PROC GENMOD; options yearcutoff=1900; options pageno=1 formdlim=" " nodate; options yearcutoff=1900; options pageno=1 title formdlim=" "; data bcancer; infile "e:\510\2007\data\brca.dat" lrecl=300; input idnum 1-4 stopmens 5 agestop1 6-7 numpreg1 8-9 agebirth 10-11 mamfreq4 12 @13 dob mmddyy8. educ 21-22 totincom 23 smoker 24 weight1 25-27; format dob mmddyy10.; if dob = "09SEP99"D then dob=.; if stopmens=9 then stopmens=.; if agestop1 = 88 or agestop1=99 then agestop1=.; if agebirth =99 then agebirth=.; if numpreg1=99 then numpreg1=.; if mamfreq4=9 then mamfreq4=.; if educ=99 then educ=.; if totincom=8 or totincom=9 then totincom=.; if smoker=9 then smoker=.; if weight1=999 then weight1=.; if stopmens = 1 then menopause=1; if stopmens = 2 then menopause=0; yearbirth = year(dob); age = int(("01JAN1997"d - dob)/365.25); if educ not=. then do; if educ in (1,2,3,4) then edcat = 1; if educ in (5,6) then edcat = 2; if educ in (7,8) then edcat = 3; highed = (educ in (6,7,8)); end; if age not=. then do; if age <50 then agecat=1; if age >=50 and age < 60 then agecat=2; if age >=60 and age < 70 then agecat=3; if age >=70 then agecat=4; end; run; title "Descriptive Statistics for Breats Cancer Data"; proc means data=bcancer n nmiss min max mean std; run; title "Logistic Regression with a Continuous Predictor"; proc logistic data=bcancer descending; *The descending option is important for the way in which you code your response variable, Y (0 or 1). This option will model the probability of the event occurring given that you code it as Y = 1. If this option is not used, you're modelling the probability of the event NOT occurring (Y = 0); model menopause = age / risklimits rsquare; units age = 1 5 10; *Calculates 3 different odds ratios (ORs) corresponding to a 1, 5 and 10 unit increase in age... The risklimits option includes 95% CI for each of these ORs; run; title "Logistic Regression with a Continuous Predictor"; title2 "Without the Descending Option"; proc logistic data=bcancer ; model menopause = age / risklimits rsquare; units age = 1 5 10; run; title "Logistic Regression Using Proc Genmod"; proc genmod data=bcancer descending; model menopause = age / dist = bin; *You need the dist=bin option to get same results as in logistic; run; title "Use Proc Univariate to get Quartiles for AGE"; proc univariate data=bcancer; var age; *You need at least some variation in the response for each level of your categorical predictor for the logistic model to work; run; data bcancer2; set bcancer; if age not=. then do; if 40<=age<=57 then AgeCat2 = 0; if age > 57 then AgeCat2 = 1; end; if educ not=. then do; if educ in (1,2,3,4) then edcat = 1; if educ in (5,6) then edcat = 2; if educ in (7,8) then edcat = 3; highed = (educ in (6,7,8)); end; run; title "Logistic Regression with Dummy Variable Predictor"; title2 "ANOVA-type representation of factors"; title3 "Use Dummy Variable, Coded as 0, 1"; proc logistic data=bcancer2 descending; model menopause = AgeCat2/ risklimits rsquare; run; title "Logistic Regression to Predict Menopause From Education"; proc logistic data=bcancer2 descending; class edcat(ref="1") / param = ref; model menopause = edcat/ risklimits rsquare; run; title "Logistic Regression with AGECAT"; title2 "This Analysis Does not Work"; title3 "Check out the Parameter Estimates and Standard Errors"; proc logistic data=bcancer descending; class agecat(ref="1") / param = ref; *Has 4 levels in original dataset; model menopause = agecat/ risklimits rsquare; run; title "Use Proc Freq to check the relationship between AGECAT and MENOPAUSE"; proc freq data=bcancer; tables agecat*menopause/ chisq; run; *Recode Agecat into AGECAT3 with 3 categories; data bcancer3; set bcancer; if age not=. then do; if age < 50 then agecat3 = 1; if age >=50 and age < 60 then agecat3 = 2; if age >=60 then agecat3 = 3; end; run; title "Logistic Regression with Ordinal Categorical Predictor"; title2 "This Analysis Works"; proc logistic data=bcancer3 descending; class agecat3(ref="1") / param = ref; model menopause = agecat3/ risklimits rsquare; run; *Note to self if the CIs and or SE look funny, do a proc freq; proc freq data=bcancer3; tables agecat3*menopause/ chisq; run; *Similarly this code can be written as the following; proc logistic data=bcancer3 descending; class agecat3 / param = ref reference = first; model menopause = agecat3/ risklimits rsquare; run; *There is usually more than one way to write code in SAS; *If you want your last group to be the ref category then specify reference = last; title "Logistic Regression with Several Predictors"; title2 "Predictors are a mix of the aforementioned types"; proc logistic data=bcancer descending; class edcat(ref="1") / param = ref; model menopause = age edcat smoker totincom numpreg1 / rsquare; run; title "Logistic Regression Using Proc Genmod"; proc genmod data=bcancer descending; class edcat(ref="1") / param = ref; model menopause = age edcat smoker totincom numpreg1 / dist=bin type3; *If you don't specify dist = bin, your results WON'T match the results of proc logistic; run;