/******************************************************** THIS COMMAND FILE READS IN THE TECUMSEH DATA SET AND DOES SOME SIMPLE DESCRIPTIVE STATISTICS. REGRESSION EXAMPLES FOR: NORMAL OUTCOME (ORDINARY LEAST SQUARES REGRESSION) BINOMIAL OUTCOME (LOGISTIC REGRESSION) COUNT OUTCOME (POISSON AND NEGATIVE BINOMIAL REGRESSION) ORDINAL MULTINOMIAL OUTCOME (CUMULATIVE LOGISTIC REGRESSION) MULTINOMIAL CATEGORICAL OUTCOME (GENERALIZED LOGIT REGRESSION) ARE SHOWN. FILENAME: TECUMSEH.SAS **************************************************************/ /*Read in SAS data set and get descriptive statistics*/ libname glmwkshp V9 "c:\temp\glmwkshp"; data tecumseh; /*WRITE TO THIS DATA SET*/ set glmwkshp.tecumseh; /*READ FROM THIS DATA SET*/ /*CREATE NEW VARIABLES*/ bmi1 = wtkg1/(htcm1/100)**2; bmi2 = wtkg2/(htcm2/100)**2; bmi3 = wtkg3/(htcm3/100)**2; if 0 <= bmi1 < 18.5 then bmicat1 = 1; if 18.5<= bmi1 < 25.0 then bmicat1 = 2; if 25.0<= bmi1 < 30.0 then bmicat1 = 3; if bmi1 >=30 then bmicat1 = 4; if 0 <= bmi2 < 18.5 then bmicat2 = 1; if 18.5<= bmi2 < 25.0 then bmicat2 = 2; if 25.0<= bmi2 < 30.0 then bmicat2 = 3; if bmi2 >=30 then bmicat2 = 4; if 0 <= bmi3 < 18.5 then bmicat3 = 1; if 18.5 <= bmi3 < 25.0 then bmicat3 = 2; if 25.0 <= bmi3 < 30.0 then bmicat3 = 3; if bmi3 >=30 then bmicat3 = 4; l_bmi1 =log(bmi1); l_bmi2 =log(bmi2); l_bmi3 =log(bmi3); if marital1 = 1 then marital = 1; if marital1 in (2,4,5) then marital = 2; if marital1 = 3 then marital = 3; if sex=1 then male=1; if sex=2 then male=0; run; /*THE RUN STATEMENT CLOSES THE DATA SET*/ proc format; value marfmt 1="married" 2="nevmar, div, sep" 3="widowed"; value edfmt 1="less than hs" 2="high school" 3="more than hs"; run; proc means data=tecumseh; title "Descriptive Statistics"; run; proc univariate data=tecumseh plot; var bmi1 logbmi1; title "Distribution of BMI and Log BMI"; title2 "For 20 to 60 year-olds"; run; proc freq data=tecumseh; where age1 between 65 and 82; tables marital1; title "Frequency of Each Marital Category"; run; *****************************************************************; /*Lab Ex 1: OLS Regression for Normally Distributed Outcome: L_BMI1 (Log of BMI at Time 1) */ *****************************************************************; ods trace on; proc genmod data=tecumseh order=internal ; ods exclude obstats ; where age1 between 20 and 60; format marital marfmt.; class marital; model l_bmi1 = male age1 marital/ dist=normal type3 obstats scale=deviance; ods output obstats=obstats_dat ParameterEstimates=parms; title "Tecumseh, 20 to 60 year olds"; run; ods trace off; proc univariate data=obstats_dat plot normal; var resraw; histogram; qqplot / normal(mu=est sigma=est); run; proc gplot data=obstats_dat; plot resraw*pred; run;quit; /*Compare Proc Genmod Output with Proc GLM*/ proc glm data=tecumseh order=internal; where age1 between 20 and 60; format marital marfmt.; class marital; model l_bmi1 = male age1 marital / solution ss3; run;quit; /*Lab Ex 1: Interaction Between Age and Male*/ proc genmod data=tecumseh; where age1 between 20 and 60; model l_bmi1 = age1 male age1*male / dist=normal type3; title "Interaction Model"; title2 "Identity Link, Tecumseh Ages 20 to 60"; run; *****************************************************************; /*Lab Ex 2: Logistic Regression for Binary outcome: CIG1 */ *****************************************************************; proc freq data=tecumseh; where age1 between 20 and 60; tables cig1; tables male*cig1 / nocol nopercent; tables agegrp1*cig1 / nocol nopercent; tables ed1*cig1 / nocol nopercent; tables marital*cig1 / nocol nopercent; title "Relationship of Categorical Predictors to Cigarette Smoking"; run; /*Lab Ex 2: Run Proc Genmod for binomial dependent variable*/ proc genmod data=tecumseh descending order=internal; where age1 between 20 and 60; format ed1 edfmt.; class ed1; model cig1 = male age1 ed1/ dist=bin type3 ; estimate "Effect of Male" male 1 / exp; estimate "Effect of Age" age1 10 / exp; estimate "Less HS vs HS" ed1 1 -1 0 / exp; estimate "Less HS vs More HS" ed1 1 0 -1 / exp; estimate "HS vs More HS" ed1 1 -1 0 / exp; run; proc genmod data=tecumseh descending order=internal; where age1 between 20 and 60; format ed1 edfmt.; class ed1; model cig1 = male age1 ed1/ dist=bin type3 link=probit; run; proc logistic data=tecumseh descending; where age1 between 20 and 60; class marital ed1 / param=ref ref=last; model cig1 = male age1 ed1 marital / rsquare; run; *******************************************************************; /*Lab Ex 3: Poisson/Negative Binomial Regression for Count Outcome: BEER1 */ *******************************************************************; proc freq data=tecumseh ; where age1 between 20 and 69; tables beer1; run; /*Lab Ex 3: Run Proc Genmod for count dependent variable BEER1. First Try Poisson Distribution. Check the Model Fit Criteria. Chisquare/df >> 1.0*/ proc genmod data=tecumseh order=internal; where age1 between 20 and 60; class ed1; format ed1 edfmt.; model beer1 = male age1 ed1/ dist=poisson type3; run; /*Now, try Negative Binomial Distribution. Model fit criteria are improved.*/ proc genmod data=tecumseh order=internal ; where age1 between 20 and 60; class ed1; format ed1 edfmt.; model beer1 = male age1 ed1/ dist=poisson type3 ; estimate "Effect of Male" male 1 / exp; estimate "Effect of Age" age1 10 / exp; estimate "Less HS vs HS" ed1 1 -1 0 / exp; estimate "Less HS vs More HS" ed1 1 0 -1 / exp; estimate "HS vs More HS" ed1 0 1 -1 / exp; run; *****************************************************************; /*Lab Ex 4: Cumulative Logistic Regression for Ordinal Outcome: BALD1 */ *****************************************************************; proc freq data=tecumseh; where age1 between 20 and 60; tables bald1; title "Number of People with Each Degree of Baldness"; run; proc genmod data=tecumseh descending; where age1 between 20 and 60; class agegrp1 sex marital1; model bald1 = agegrp1 sex / dist=mult type3; run; *****************************************************************; /*Lab Ex 5: Generalized Logit Regression for Catgorical Dependent Variable: CIGCAT1 */ *****************************************************************; /*First recode the categories of CIGAMT1 and create dichotomies*/ data tecumseh2; set tecumseh; if cigday1 = 0 then cigcat1 = 0; if cigday1 in (1,2) then cigcat1 = 1; if cigday1 = 3 then cigcat1 = 2; if cigday1 = 4 then cigcat1 = 3; if cigday1 in (5,6,7,8) then cigcat1 = 4; if cigday1 not=. then do; cig_dichot1 = (cigcat1>0); cig_dichot2 = (cigcat1>1); cig_dichot3 = (cigcat1>2); cig_dichot4 = (cigcat1>3); end; run; proc freq data=tecumseh2; where age1 between 20 and 60; tables cigday1 cigcat1; title "Number of People in Each Cig Category"; run; proc genmod data=tecumseh2 descending; where age1 between 20 and 60; model cigcat1 = age1 male/ dist=mult type3 ; title "Ordinal Logistic Regression for Cigcat1"; title2 "Using Proc Genmod"; run; /*Lab Ex 5: Run Proc Logistic to check for proportional odds assumption*/ proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cigcat1 = age1 male / rsquare; title "Ordinal Logistic Regression for Cigcat1"; title2 "Using Proc Logistic to Check Prop Odds"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cig_dichot1 = age1 male; title "Logistic Regression for Dichotomy 1"; title2 "First Step of the Ladder"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cig_dichot2 = age1 male; title "Logistic Regression for Dichotomy 2"; title2 "Second Step of the Ladder"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cig_dichot3 = age1 male; title "Logistic Regression for Dichotomy 3"; title2 "Third Step of the Ladder"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cig_dichot4 = age1 male; title "Logistic Regression for Dichotomy 4"; title2 "Last Step of the Ladder"; run; /*Lab Ex 5: Run Generalized Logit Model Using Proc Logistic*/ proc logistic data=tecumseh2 descending; where age1 between 20 and 60; model cigcat1 = age1 male / link=glogit; title "Generalized Logit Regression for Cigcat1"; title2 "Using Proc Logistic"; run; /*Lab Ex 5: Run Generalized Logit Model Using Proc Catmod. Different Ref Category*/ proc catmod data=tecumseh2 ; where age1 between 20 and 60; direct age1 male; model cigcat1 = age1 male / noprofile; title "Generalized Logit Regression for Cigcat1"; title2 "Using Proc Catmod. The Ref Category is 4"; run; quit; /*Comparison of a Series of Logistic Regressions with the Overall Model Using Proc Catmod*/ proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (0,4); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 4 vs 0"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (1,4); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 4 vs 1"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (2,4); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 4 vs 2"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (3,4); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 4 vs 3"; run; /*Series of Logistic Regressions to Compare with the Overall Model Using Proc Logistic with Glogit Link*/ proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (0,4); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 4 vs 0"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (0,3); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 3 vs 0"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (0,2); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 2 vs 0"; run; proc logistic data=tecumseh2 descending; where age1 between 20 and 60 and cigcat1 in (0,1); model cigcat1 = age1 male; title "Logistic Regression for Cigcat1"; title2 "Category 1 vs 0"; run;