/************************************************ THIS COMMAND FILE GOES WITH CHAPTER 12: Recoding Variables and Creating Dummy Variables in SAS FILENAME: RECODE.SAS *************************************************/ options nodate pageno=1; OPTIONS FORMCHAR="|----|+|---+=|-/\<>*"; title; /*Recode Example*/ data owen; infile "owen.dat"; input fam_num childnum age sex race w_rank income_c height weight hemo vit_c vit_a head_cir fatfold b_weight mot_age b_order m_height f_height ; if vit_a = 99 then vit_a = .; if head_cir = 99 then head_cir = .; if fatfold = 99 then fatfold = .; if mot_age = 99 then mot_age = .; if b_order = 99 then b_order = .; if height = 999 then height = .; if weight = 999 then weight = .; if b_weight = 999 then b_weight = .; if m_height = 999 then m_height = .; if f_height = 999 then f_height = .; /*Correct recode of mother's age into agegrp*/ if mot_age >= 0 and mot_age < 20 then agegrp = 1; if mot_age >= 20 and mot_age < 25 then agegrp = 2; if mot_age >= 25 and mot_age < 30 then agegrp = 3; if mot_age >= 30 and mot_age < 35 then agegrp = 4; if mot_age >= 35 and mot_age < 40 then agegrp = 5; if mot_age >= 40 then agegrp = 6; /*Incorrect recode of mother's age*/ if mot_age < 20 then wrongage = 1; if mot_age >= 20 and mot_age < 25 then wrongage = 2; if mot_age >= 25 and mot_age < 30 then wrongage = 3; if mot_age >= 30 and mot_age < 35 then wrongage = 4; if mot_age >= 35 and mot_age < 40 then wrongage = 5; if mot_age >= 40 then wrongage = 6; /*Create dummy variable for birth weight < 2500 grams*/ b_weight = 10*b_weight; if b_weight > 0 and b_weight < 2500 then lowbwt = 1; if b_weight >= 2500 then lowbwt = 0; run; /*Check recodes*/ proc means data=owen; run; /*USE PROC MEANS TO CHECK THE VALUES OF NEW VARIABLES*/ proc means data=owen; class agegrp; var mot_age; run; proc freq data=owen; tables wrongage agegrp; run; /*CREATE DUMMY VARIABLES*/ /*METHOD 1*/ data newowen; set owen; if agegrp ne . then do; age1 = (agegrp=1); age2 = (agegrp=2); age3 = (agegrp=3); age4 = (agegrp=4); age5 = (agegrp=5); age6 = (agegrp=6); end; male = (sex=1); white = (race=1); run; /*CHECK ON DUMMY VARIABLES USING PROC PRINT AND PROC FREQ*/ proc print data=newowen(obs=15) noobs; var mot_age agegrp age1-age6 sex male race white; title "Checking on Dummy Variables"; run; proc freq data=newowen; tables age1-age6 male white; run; /*USING DUMMY VARIABLES IN A REGRESSION MODEL*/ title "Regression Model Using Dummy Variables"; proc reg; model b_weight = age2 age3 age4 age5 age6; run; quit; /*METHOD 2 FOR CREATING DUMMY VARIABLES*/ data newowen2; set owen; /*Dummy variables for AGEGRP*/ if agegrp ne . then do; age1 = 0; if agegrp=1 then age1=1; age2 = 0; if agegrp=2 then age2=1; age3 = 0; if agegrp=3 then age3=1; age4 = 0; if agegrp=4 then age4=1; age5 = 0; if agegrp=5 then age5=1; age6 = 0; if agegrp=6 then age6=1; end; /*Dummy variable for Male*/ male = 0; if sex=1 then male=1; if sex=. then male=.; /*Dummy variable for white*/ white = 0; if race=1 then white=1; if race=. then white=.; run; proc freq data=newowen2; tables age1-age6 male white; run; /*CORRECTING DATA VALUES USING RECODES (DATA CLEANING)*/ data fixup; set owen; if fam_num=5911 and childnum=1 then do; b_weight=3025; m_height=162; end; if fatfold = 42 then fatfold=24; run; proc print; where fam_num=5911 and childnum=1; var fam_num childnum b_weight m_height; title "Printout of One Child's Data for Checking"; run;