/************************************************* Statistical Analysis with Missing Data: Lab Session 2 K. Welch, B. Gillespie, and B. West Multiple Imputation and Multiple Imputation Analysis using IVEware Software. *************************************************/ /************************************************************************* We continue with the linear regression framework from yesterday to illustrate the use of IVEware software to generate multiple imputed data sets, and then perform multiple imputation analysis using IVEware. We will again be using a subset of data from the Longitudinal Study of Aging. For illustration purposes, we will again ignore the complex design features of the LSOA. IVEware is available for free download from http://www.isr.umich.edu/src/smp/ive. ***************************************************************************/ /* This line of code is used to tell SAS where the IVEware files are located. */ options set = SRCLIB "C:\Program Files\Srclib" sasautos = ('!SRCLIB' sasautos) mautosource; /******************************************************************* MULTIPLE IMPUTATION EXERCISE: SET OPTIONS. DECLARE LIBNAME. CHECK CONTENTS LISTING OF LSOA DATA SET. ********************************************************************/; OPTIONS PAGESIZE=54 LINESIZE=72 NODATE; LIBNAME LAB 'E:\lsoa'; LIBNAME OUT 'E:\'; TITLE1 'LAB SESSION 2: Multiple Imputation Exercise'; TITLE2 'MULTIPLE IMPUTATION USING IVEWARE %IMPUTE'; TITLE3 'CONTENTS LISTING OF DATA SET'; /******************************************************************* CREATE TEMPORARY DATA SET FOR THE IMPUTATION RUN. ********************************************************************/; DATA LSOATEMP; SET LAB.LSOA_86X; KEEP poverty own educ logadl logadl2r age84 sex black other fnlwgt2 psu stratum; /* We keep the last three variables to demonstrate TRANSFER in IVEware. */ /* Create a binary ownership variable */ own=0; if ownbuyr in (1,2) then own=1; if ownbuyr=.D then own=.; /* Take log transforms of number of ADLs requiring assistance in 1986 and 1984 (for normality). Add 1 to avoid taking logarithm of zero. NUMADL= Number of restricted activities of daily living (baseline, 1984) NUMADL2R= Number of restricted activities of daily living (followup, 1986) */ logadl2r=log(numadl2r+1); logadl=log(numadl+1); /* Create dummy variables for race */ black=0; if racer=2 then black=1; if racer=. then black=.; other=0; if racer=3 then other=1; if racer=. then other=.; RUN; PROC CONTENTS DATA=LSOATEMP; RUN; /******************************************************************* USE IVEWARE %IMPUTE MODULE TO MULTIPLY IMPUTE THE MISSING VALUES FOR NUMADL, NUMADL2R, AND SELECTED KEY PREDICTOR VARIABLES IN THE 1986 LSOA DATA SET. 5 MULTIPLE IMPUTATION DATA SETS WILL BE CREATED. *******************************************************************/; /* NOTE: This code needs to be run in the regular program editor! */ %IMPUTE(SETUP=NEW,NAME=IMPADL86,DIR=C:\TEMP); DATAIN LSOATEMP; DATAOUT OUT.LSOAIMP; DEFAULT CONTINUOUS; /* All variables not declared as categorical or count will be imputed using OLS regression models (e.g. normality). */ CATEGORICAL POVERTY OWN sex black other; /* IVEware will automatically fit the appropriate model (e.g. logistic regression) to impute categorical variables! */ COUNT EDUC; /* IVEware will automatically fit Poisson regression models to impute count variables. */ TRANSFER fnlwgt2 psu stratum; /* TRANSFER variables will simply be carried over to the imputed data sets, and not used in the prediction models. */ BOUNDS AGE84(>=70,<=110); /* One can place bounds on the imputed values that are predicted in IVEware...nice feature! */ ITERATIONS 5; MULTIPLES 5; /* 5 Imputed Data Sets are requested. */ SEED 2005; /* Random number seed for random draws. */ RUN; /* Note the imputation details in the output! Distributions of variables are maintained. */ /******************************************************************* USE IVEWARE %PUTDATA MACRO TO RETRIEVE 5 MULTIPLY IMPUTED DATA SETS FROM THE "PACKED" STORAGE FORM. THIS IS NECESSARY ONLY WHEN MULTIPLE IMPUTATIONS HAVE BEEN SPECIFIED IN THE IVEWARE %IMPUTE COMMAND. *******************************************************************/; /* NOTE: This code needs to be run in the regular program editor! */ %PUTDATA(NAME=IMPADL86,DIR=C:\TEMP,MULT=1,DATAOUT=OUT.LSOAIMP1); %PUTDATA(NAME=IMPADL86,DIR=C:\TEMP,MULT=2,DATAOUT=OUT.LSOAIMP2); %PUTDATA(NAME=IMPADL86,DIR=C:\TEMP,MULT=3,DATAOUT=OUT.LSOAIMP3); %PUTDATA(NAME=IMPADL86,DIR=C:\TEMP,MULT=4,DATAOUT=OUT.LSOAIMP4); %PUTDATA(NAME=IMPADL86,DIR=C:\TEMP,MULT=5,DATAOUT=OUT.LSOAIMP5); /* generate the dependent variable in each of the imputed data sets (a macro could also be used) */ DATA OUT.LSOAIMP1; SET OUT.LSOAIMP1; diffadl=logadl2r-logadl; RUN; DATA OUT.LSOAIMP2; SET OUT.LSOAIMP2; diffadl=logadl2r-logadl; RUN; DATA OUT.LSOAIMP3; SET OUT.LSOAIMP3; diffadl=logadl2r-logadl; RUN; DATA OUT.LSOAIMP4; SET OUT.LSOAIMP4; diffadl=logadl2r-logadl; RUN; DATA OUT.LSOAIMP5; SET OUT.LSOAIMP5; diffadl=logadl2r-logadl; RUN; /********************************************************************* COMPUTE DESCRIPTIVE STATISTICS FOR EACH OF THE FIVE IMPUTED DATA SETS. **********************************************************************/; TITLE 'Descriptive analyses after imputation'; OPTIONS NOLABEL; PROC MEANS DATA=OUT.LSOAIMP1 nmiss mean std min max; VAR diffadl poverty own educ logadl logadl2r age84 sex black other; RUN; PROC MEANS DATA=OUT.LSOAIMP2 nmiss mean std min max; VAR diffadl poverty own educ logadl logadl2r age84 sex black other; RUN; PROC MEANS DATA=OUT.LSOAIMP3 nmiss mean std min max; VAR diffadl poverty own educ logadl logadl2r age84 sex black other; RUN; PROC MEANS DATA=OUT.LSOAIMP4 nmiss mean std min max; VAR diffadl poverty own educ logadl logadl2r age84 sex black other; RUN; PROC MEANS DATA=OUT.LSOAIMP5 nmiss mean std min max; VAR diffadl poverty own educ logadl logadl2r age84 sex black other; RUN; /******************************************************************* USE IVEWARE %REGRESS MODULE TO PERFORM REGRESSION ANALYSIS USING THE FIRST OF FIVE MULTIPLY IMPUTED DATA SETS. THE DEPENDENT VARIABLE WILL BE DIFFADL. *******************************************************************/; /* NOTE: This code needs to be run in the regular program editor! */ %REGRESS(SETUP=NEW,NAME=LSOAREG1,DIR=C:\TEMP); DATAIN OUT.LSOAIMP1; CATEGORICAL POVERTY SEX; /* identifies categorical predictors */ LINK LINEAR; /* requests OLS regression analysis */ DEPENDENT diffadl; PREDICTOR poverty own educ logadl age84 sex black other; RUN; /******************************************************************* USE IVEWARE'S SPECIAL FEATURE TO PERFORM REGRESSION ANALYSIS OF THE FIVE MULTIPLY IMPUTED DATA SETS (MULTIPLE IMPUTATION ANALYSIS). THE DEPENDENT VARIABLE WILL BE DIFFADL. *******************************************************************/; /* NOTE: This code needs to be run in the regular program editor! */ %REGRESS(SETUP=NEW,NAME=LSOAREGMULT,DIR=C:\TEMP); DATAIN OUT.LSOAIMP1 OUT.LSOAIMP2 OUT.LSOAIMP3 OUT.LSOAIMP4 OUT.LSOAIMP5; /* Note that five data sets are listed! */ CATEGORICAL POVERTY SEX; /* identifies categorical predictors */ LINK LINEAR; /* requests OLS regression analysis */ DEPENDENT diffadl; PREDICTOR poverty own educ logadl age84 sex black other; RUN; /* Better MI analysis than one assuming multivariate normality! */