###################################################################### ################ PS-239 - Section 8 - GenMatch ################## ###################################################################### library(MASS) library(foreign) library(Matching) library(xtable) # LOAD SHORT DATASET : 1,000 CONTROLS load(file="W:/Teaching/Fall2006_PolSci239/ProblemSets/ProblemSet6/lalonde2b.RData") # The dataset is called "lalonde2" dim(lalonde2) names(lalonde2) #Attach the dataset attach(lalonde2) #The covariates we want to match on X = cbind(age, education, black, hispan, married, nodegree, u74, u75, re75, re74); #The covariates we want to obtain balance on #ARE THESE ENOUGH?? WHAT ELSE MAY WE WANT TO LOOK AT? BalanceMat <- cbind(age, education, black, hispan, married, nodegree, u74, u75, re75, re74, I(re74*re75)); #Propensity score estimation pscore<-glm(treat~age+education+black+hispan+married+nodegree+u74+u75+re74+re75+I(re74*re75),family=binomial(link=logit)) phat<-pscore$fitted.values muhat<-pscore$linear.predictors #Orthogonalize covariates X<-cbind(muhat,X) # Include the estimated linear predictor in the covariates we will match on Xo<-X for(i in 2:ncol(Xo)){ Xo[,i]<-lm(X[,i]~muhat)$residuals } names(Xo)<-names(X) #Let's check that the corr bewtween estimated linear predictor #and orthogonalized covariates are actually zero for (i in 2:ncol(Xo)){ print(cor(Xo[,1],Xo[,i])) } #How much is the linear predictor able to capture? for (i in 2:ncol(Xo)){ print(cor(X[,i],Xo[,i])) } #Let's call GenMatch() to find the optimal weight to give each #covariate in 'X' so as we have achieved balance on the covariates in #'BalanceMat'. For details on 'pop.size', 'max.generations' and #'wait.generations' see the GenMatch help and the genoud help. These #three are the most important 'GenMatch' specific options aside from #'BalanceMatrix'. ############### # WARNING 1: THESE SETTINGS ARE FOR A QUICK RUN. FOR EXAMPLE, A LARGER POP.SIZE IS NEEDED. # Solution to WARNING 1: see help(genoud) for default values used by genoud # ==> pop.size=1,000, max.generations=100 and wait.generations=10 # WARNING 2: NO PSCORE IS BEING USED IN THIS EXAMPLE # WARNING 3: NO ORTHOGONALIZATION IS BEING DONE (THERE IS AFTER ALL NO PSCORE) # Solutions to WARNING 2 and WARNING 3 ==> we've included the estimated linear predictor # and all orthogonalized covariates in our Xo matrix ############### genout <- GenMatch(Tr=treat, X=Xo, BalanceMatrix=BalanceMat, estimand="ATT", M=1, pop.size=10, max.generations=10, wait.generations=1, data.type.int=FALSE) #The outcome variable Y=re78/1000; # Now that GenMatch() has found some weights, let's estimate # our causal effect of interest using those weights mout <- Match(Y=Y, Tr=treat, X=Xo, estimand="ATT", Weight.matrix=genout) summary(mout) # #Let's determine if balance has actually been obtained on the variables of interest #WHAT ELSE MAY BE OF INTEREST? mb <- MatchBalance(treat~age +education+black+ hispan+ married+ nodegree+ u74+ u75+ re75+ re74+ I(re74*re75), match.out=mout, nboots=1000) # Remember: the "Weight" option in Match() determines the weighting scheme the matching # algorithm uses when weighting each of the covariates in 'X'. The default Weight=1 denotes that weights are equal # to the inverse of the variances. Weight=2 denotes the Mahalanobis distance metric, and Weight=3 denotes that the user will supply a # weight matrix ('Weight.matrix'). The latter case is what you do when you use GenMatch() to do genetic matching. # If you supply a 'Weight.matrix', 'Weight' will be automatically set to be equal to 3.