*RDiT Simulations *Hausman and Rapson, ARRE, "Regression Discontinuity in Time: Considerations for Empirical Applications" //cd here global imagespath "../../Paper\imagefiles\" //findit vincenty clear all version 13 version 13: set seed 100313 //todays date 100313 //random things: monitor selection, treatment start dates //re-setting would lead to costly data work, because this selects which states we use for hourly data *-----------------------------------* *----------Random Selections--------* *-----------------------------------* ****OZONE DATA *We use ozone data from Auffhammer and Kellogg 2009 * https://www.aeaweb.org/articles?id=10.1257/aer.101.6.2687 *We drop California stations, since we know there are confounders there *Following AK2009, we only keep monitors with a certain percentage of valid observations *Also, we only keep monitors that are open for the whole sample use AER20090377_FinalData, clear drop if state_code==6 //california drop if valid<9 //following AK. At least 9 valid observations for the period between 9 am and 9 pm each day. keep state_code county_code site_id ozone_max day month year Date TempMax TempMin Rain Snow tostring state county site, replace replace state="0"+state if length(state)==1 replace county="00"+county if length(county)==1 replace county="0"+county if length(county)==2 replace site_id="000"+site_id if length(site_id)==1 replace site_id="00"+site_id if length(site_id)==2 replace site_id="0"+site_id if length(site_id)==3 gen monitorstring=state+county+site rename Date date gen pollutant="ozone" rename ozone_max ozone *candidate monitors: open for 18 years and have no two-month gaps (count: 110) sort monitorstring date gen temp=date-date[_n-1] if monitorstring==monitorstring[_n-1] bysort monitorstring: egen temp2=max(temp) bysort monitorstring: egen mindate=min(date) bysort monitorstring: egen maxdate=max(date) gen length=maxdate-mindate codebook monitorstring if temp2<60 & length>16*365 keep if temp2<60 & length>16*365 drop temp* *random selection of monitors *this is not currently needed, because we use all monitors sort monitorstring date version 13: gen temp1=runiform() gen temp2=1 if monitorstring!=monitorstring[_n-1] gen temp3=temp1 if temp2==1 egen temp4=rank(temp3) bysort monitorstring: egen monitor=max(temp4) drop temp1-temp4 compress save counterfactual_data_ak2009, replace version 13: display c(seed) *X2479360ac069ac80b783ea1dd2d3020300044aec *****RANDOM START DATES *10 random treatment start dates, with at least 4 years pre and 4 years post /*if not running immediately following previous paragraph, do: set seed X2479360ac069ac80b783ea1dd2d3020300044aec */ use counterfactual_data_ak2009, clear keep date duplicates drop version 13: gen temp=runiform() drop if date<10593+4*365 //min start date, with four years post drop if date>17166-4*365 //max start date, with four years pre sort temp keep if [_n]<=10 //10 random date keep date sort date save start_dates, replace *12274 12614 12717 12804 13061 13344 13712 14471 14910 15678 version 13: display c(seed) *X09b8d3186fd92d522a2483c627612e4300040a48 *****NOISE DATA *adding in noise *currently using this only for AR(1) simulations. *For main simulations, we actually need 1000 samples of 1000 monitors each. This is below. /*if not running immediately following previous paragraph, do: set seed X09b8d3186fd92d522a2483c627612e4300040a48 */ use counterfactual_data_ak2009, clear keep date day month year pollutant duplicates drop expand 1000 bysort date: gen monitor=[_n] version 13: gen ozone=rnormal() save counterfactual_data_noise, replace version 13: display c(seed) *Xc72478be3ee7e07259212ca18b83cf9f00043868 *****RANDOMLY GENERATED AR(1) DATA *Counterfactual AR(1) data *one sample of 1000 monitors use counterfactual_data_noise, clear /*if not running immediately following previous paragraph, do: set seed Xc72478be3ee7e07259212ca18b83cf9f00043868 */ ///earlier dates, for burn-in period drop month year day ozone pollutant expand 2, gen(expand) //sum date //display r(max)-r(min)+1 replace date=date-6574 if expand==1 drop expand //burn in didn't look long enough. try again. i now think it was long enough. *[update: i know think not necessary. could take out] expand 2, gen(expand) sum date display r(max)-r(min)+1 replace date=date-13148 if expand==1 drop expand xtset monitor date, daily gen temp=1 count if l.temp==. count if l.temp==. & date!=-9129 drop temp //verifying no missing dates version 13: gen resid=rnormal() gen post=0 replace post=1 if date>=13344 //sum date // earliest: -9129 local beta_true -0.2 foreach auto of numlist 0/9{ gen counterfactual_`auto'=0 if date==-9129 replace counterfactual_`auto'=(`auto'/10)*l.counterfactual_`auto'+`beta_true'*post+resid if date>-9129 } keep if date>=10593 drop resid compress save counterfactual_data_autoregress, replace version 13: display c(seed) *X55c34156a345ba66e3d084b16d17cc3e000421fd *---------------------------------------* *-------Air Quality Summary Stats-------* *---------------------------------------* **TABLE: AIR QUALITY SUMMARY STATS, AK DATA use counterfactual_data_ak2009, clear keep if monitor!=89 & monitor!=95 //some of the monitors have too much missing data in the simulations below gen TempMean=(TempMin+TempMax)/2 format %8.3f ozone format %8.2f Rain Snow format %8.0f TempMean TempMin TempMax //Rain is in 0.01 inches //Snow is in inches replace Rain=Rain/100 //rain is in inches sum ozone TempMin TempMax TempMean Rain Snow, sep(0) format codebook date monitor count if ozone==0 //153 obs, out of 682130 **FIGURE: COUNTERFACTUAL AND TREATMENT EFFECT AT REPRESENTATIVE MONITOR, AK DATA use counterfactual_data_ak2009, clear tab monitors if monitor==2 local beta_true = -0.2 local start_date = 13344 gen lnozone=ln(ozone) *generate treatment and outcome gen treatment=0 replace treatment=1 if date>=`start_date' gen outcome=lnozone+`beta_true'*treatment replace outcome=. if treatment==0 rename lnozone x rename outcome y label variable x "True" label variable y "With simulated treatment" scatter x y date if monitor==2 & abs(date-`start_date')<4*365, /// msize(small small) mcolor(gs8 black ) msymbol(O Oh ) /// xline(`start_date', lcolor(gs8)) /// xtitle("") ytitle("Log ambient ozone") /// graphregion(color(white)) graph export $imagespath\trueseries`start_date'.eps, as(eps) replace graph export $imagespath\trueseries`start_date'.png, as(png) width(1800) replace tab state_c if monitor==2 tab county_c if monitor==2 *-----------------------------------* *------------Simulations------------* *---------Defining Programs---------* *-----------------------------------* program drop _all program define RDIT_SIM if "$data"=="airquality"{ file open simulate using "results_airquality_$section.csv", write text replace } if "$data"=="noise"{ file open simulate using "results_noise_$section.csv", write text replace } file write simulate "data,section,monitor,pollutant," file write simulate "beta_true,start_date," file write simulate "length,anticipation,expdecayrate,genlogdecay_b_M,phaseinlength," file write simulate "estimator,version,bic,r2,n," file write simulate "beta_hat,standard_error," file write simulate "mintreat,maxtreat" _n foreach length of numlist $length_list { foreach anticipation in $anticipation_list{ foreach beta_true in $beta_true_list { foreach start_date in $start_date_list{ foreach expdecayrate in $expdecayrate_list{ forval b = $genlog_blist{ forval M = $genlog_Mlist{ foreach phaseinlength in $phasein_list{ foreach pollutant in $pollutant_list { if "$data"=="airquality"{ use counterfactual_data_ak2009, clear } if "$data"=="noise"{ use counterfactual_data_noise, clear //mean zero, standard deviation 1 } xtset monitor date, daily if "$section"=="genlogdecay"{ local A 0 local K 1 local B=-`b'/10000 local v 0.5 local Q 0.01 } gen date_c=(date-`start_date')/1000 //centered date variable, rescaled so high order terms don't blow up in magnitude gen ln`pollutant'=ln(`pollutant') if "$data"=="noise"{ replace ln`pollutant'=`pollutant' //don't want to log, because then non-normal and <0 missing //i.e., assume the noise data as already log-normally distributed. } *generate treatment and outcome gen treatment=0 replace treatment=1 if date>=`start_date' & date<`start_date'+`length' replace treatment=0.5 if date>=`start_date'-`anticipation' & date<`start_date' if "$section"=="exponentialdecay"{ replace treatment=1*(1-`expdecayrate')^(date-`start_date') if date>=`start_date' } if "$section"=="genlogdecay"{ replace treatment=0 replace treatment=`A'+(`K'-`A')/(1+`Q'*exp(-`B'*(date-`start_date'-`M')))^(1/`v') if date>=`start_date' } if "$section"=="phasein"{ replace treatment=0 replace treatment=(date-`start_date'+1)/`phaseinlength' if date>=`start_date' & date<`start_date'+`phaseinlength' replace treatment=1 if date>=`start_date'+`phaseinlength' } sum treatment if date>=`start_date' local maxtreat=r(max) sum treatment if date>=`start_date' local mintreat=r(min) gen outcome=ln`pollutant'+`beta_true'*treatment * Now put ourselves in the empiricist's shoes. *variables for regression, assuming constant treatment effect at true start date foreach y of numlist 1/9{ gen date_c`y'=date_c^`y' //centered date variables for global polynomial } gen post=0 replace post=1 if date>=`start_date' //empiricist assumes known start date and constant treatment thereafter foreach y of numlist 1/9{ gen pre_date_c`y'=date_c^`y'*(1-post) //separate pre- and post- polynomials } foreach y of numlist 1/9{ gen post_date_c`y'=date_c^`y'*(post) //run as two separate loops so ordering is convenient } quietly tab month, gen(MM) gen dow=dow(date) quietly tab dow, gen(DOW) if "$data"=="airquality"{ foreach v in TempMax TempMin Rain Snow{ quietly sum `v' local temp=r(mean) gen `v'2=(`v'-`temp')^2 gen `v'3=(`v'-`temp')^3 } } gen ym=ym(year,month) if "$data"=="airquality"{ local controls "MM* DOW* TempMax* TempMin* Rain* Snow*" } * augmented local linear foreach bandwidth of numlist 30 { gen studyperiod`bandwidth'=1 if abs(date-`start_date')<=`bandwidth' replace studyperiod`bandwidth'=0 if studyperiod`bandwidth'==. } *NOW START LOOPING THROUGH MONITORS foreach monitor of numlist $monitor_list{ *global polynomial, bic chosen, with controls *pre post with controls is equivalent to this with order 1 foreach order of numlist 1/9{ reg outcome post `controls' date_c1-date_c`order' /// if abs(date-`start_date')<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "$data,$section,`monitor',`pollutant'," file write simulate "`beta_true',`start_date'," file write simulate "`length',`anticipation',`expdecayrate',genlogdecay_`b'_`M',`phaseinlength'," file write simulate "global_poly,order_`order',`bic',`r2',`n'," file write simulate "`estimate',`se'," file write simulate "`mintreat',`maxtreat'" _n } *separate polynomial, bic chosen, with controls *pre post with controls is equivalent to this with order 1 foreach preorder of numlist 1/3{ foreach postorder of numlist 1/3{ reg outcome post `controls' pre_date_c1-pre_date_c`preorder' post_date_c1-post_date_c`postorder' /// if abs(date-`start_date')<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "$data,$section,`monitor',`pollutant'," file write simulate "`beta_true',`start_date'," file write simulate "`length',`anticipation',`expdecayrate',genlogdecay_`b'_`M',`phaseinlength'," file write simulate "separate_poly,order_`preorder'_`postorder',`bic',`r2',`n'," file write simulate "`estimate',`se'," file write simulate "`mintreat',`maxtreat'" _n } } *local linear foreach bandwidth of numlist 30 { reg outcome post `controls' date_c if abs(date-`start_date')<=`bandwidth' & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "$data,$section,`monitor',`pollutant'," file write simulate "`beta_true',`start_date'," file write simulate "`length',`anticipation',`expdecayrate',genlogdecay_`b'_`M',`phaseinlength'," file write simulate "local_linear,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'," file write simulate "`mintreat',`maxtreat'" _n } *augmented local linear foreach bandwidth of numlist 30 { reg outcome `controls' if abs(date-`start_date')<=365*4 & monitor==`monitor' predict temp, resid reg temp post date_c if studyperiod`bandwidth'==1 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) drop temp file write simulate "$data,$section,`monitor',`pollutant'," file write simulate "`beta_true',`start_date'," file write simulate "`length',`anticipation',`expdecayrate',genlogdecay_`b'_`M',`phaseinlength'," file write simulate "local_linear_fullsample,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'," file write simulate "`mintreat',`maxtreat'" _n } *END OF MONITOR LOOP (FIRST) } *monthly collapse if "$data"=="airquality"{ collapse (mean) outcome TempMax TempMin Rain Snow, by(year month monitor) } if "$data"=="noise"{ collapse (mean) outcome, by(year month monitor) } gen date=mdy(month,1,year) gen date_c=(date-`start_date')/1000 gen post=0 replace post=1 if date>=`start_date' quietly tab month, gen(MM) if "$data"=="airquality"{ foreach v in TempMax TempMin Rain Snow{ quietly sum `v' local temp=r(mean) gen `v'2=(`v'-`temp')^2 gen `v'3=(`v'-`temp')^3 } } if "$data"=="airquality"{ local controls "MM* TempMax* TempMin* Rain* Snow*" } *RE-START MONITOR LOOPS foreach monitor of numlist $monitor_list{ reg outcome post `controls' date_c if abs(date-`start_date')<=365*4 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "$data,$section,`monitor',`pollutant'," file write simulate "`beta_true',`start_date'," file write simulate "`length',`anticipation',`expdecayrate',genlogdecay_`b'_`M',`phaseinlength'," file write simulate "monthlycollapse,monthlycollapse,`bic',`r2',`n'," file write simulate "`estimate',`se'," file write simulate "`mintreat',`maxtreat'" _n } *END OF MONITOR LOOP (SECOND) } } } } } } } } } file close simulate end *-----------------------------------* *------------Simulations------------* *---------Air Quality Data----------* *-----------------------------------* //right now, 5 hours if you do all the parameter combinations listed below and all the regression //specifications listed in the program. timer clear timer on 1 global data "airquality" global section "basic" global monitor_list "1/88 90/94 96/110" //some of the monitors have too much missing data global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "12274 12614 12717 12804 13061 13344 13712 14471 14910 15678" //recall: randomly selected above global length_list "1000000" //ie, entire post-period global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "airquality" global section "sharpdecay" global monitor_list "1/88 90/94 96/110" global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "12274 12614 12717 12804 13061 13344 13712 14471 14910 15678" global length_list "30 365" //the treatment lasts only one month or one year global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "airquality" global section "sharpdecayfull" global monitor_list "1/88 90/94 96/110" global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "12274 12614 12717 12804 13061 13344 13712 14471 14910 15678" global length_list "0(100)1500" //treatment lasts 0 days, 100 days, 200 days, etc. up to 4 years of post data = 365*4 global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "airquality" global section "genlogdecay" global monitor_list "1/88 90/94 96/110" global beta_true_list "-0.2 " global pollutant_list "ozone" global start_date_list "12274 12614 12717 12804 13061 13344 13712 14471 14910 15678" global length_list "1000000" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "50(25)150" //stepping over b parameter of generalized log decay global genlog_Mlist "0(200)1000" //stepping over M parameter of generalized log decay global phasein_list "1" quietly RDIT_SIM global data "airquality" global section "phasein" global monitor_list "1/88 90/94 96/110" global beta_true_list "-0.2 " global pollutant_list "ozone" global start_date_list "12274 12614 12717 12804 13061 13344 13712 14471 14910 15678" global length_list "1000000" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "7" //linear ramp-up of treatment of one day, one week, one month, one year quietly RDIT_SIM timer off 1 timer list 1 *-----------------------------------* *-------------Simulations-----------* *------Counterfactual is noise------* *-----------------------------------* //right now, 17 hours if you do all the parameter combinations listed below and all the regression //specifications listed in the program. timer on 2 //loop through all 1000 replications global data "noise" global section "basic" global monitor_list "1/1000" global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "13344" global length_list "1000000" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "noise" global section "sharpdecay" global monitor_list "1/1000" global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "13344" global length_list "30 365" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "noise" global section "sharpdecayfull" global monitor_list "1/1000" global beta_true_list "-0.2" global pollutant_list "ozone" global start_date_list "13344" global length_list "0(100)1500" //limiting to 4 years of post data = 365*4 global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "1" quietly RDIT_SIM global data "noise" global section "genlogdecay" global monitor_list "1/1000" global beta_true_list "-0.2 " global pollutant_list "ozone" global start_date_list "13344" global length_list "1000000" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "50(25)150" global genlog_Mlist "0(200)1000" global phasein_list "1" quietly RDIT_SIM global data "noise" global section "phasein" global monitor_list "1/1000" global beta_true_list "-0.2 " global pollutant_list "ozone" global start_date_list "13344" global length_list "1000000" global anticipation_list "0" global expdecayrate_list "0" global genlog_blist "0(1)0" global genlog_Mlist "0(1)0" global phasein_list "7" quietly RDIT_SIM timer off 2 timer list 2 *-----------------------------------* *----------Autoregression-----------* *-----------------------------------* //just one start date, 13344, and just one beta_true: -0.2 file open simulate using "results_autoregress.csv", write text replace file write simulate "monitor,auto_parameter,ar1_included,ar1_estimate,ar1_standarderror," file write simulate "estimator,version,bic,r2,n," file write simulate "beta_hat,standard_error" _n use counterfactual_data_autoregress, clear xtset monitor date, daily gen date_c=(date-13344)/1000 gen year=year(date) gen month=month(date) * Now put ourselves in the empiricist's shoes. *variables for regression, assuming constant treatment effect at true start date foreach y of numlist 1/9{ gen date_c`y'=date_c^`y' } foreach y of numlist 1/9{ gen pre_date_c`y'=date_c^`y'*(1-post) //run as two separate loops so ordering is convenient } foreach y of numlist 1/9{ gen post_date_c`y'=date_c^`y'*(post) } gen ym=ym(year,month) * augmented local linear foreach bandwidth of numlist 30 { gen studyperiod`bandwidth'=1 if abs(date-13344)<=`bandwidth' replace studyperiod`bandwidth'=0 if studyperiod`bandwidth'==. } foreach monitor of numlist 1/1000{ foreach auto_parameter of numlist 1(2)9{ *global polynomial, bic chosen, with controls *pre post with controls is equivalent to this with order 1 foreach order of numlist 1/9{ reg counterfactual_`auto_parameter' post date_c1-date_c`order' /// if abs(date-13344)<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',no,,," file write simulate "global_poly,order_`order',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } foreach order of numlist 1/9{ reg counterfactual_`auto_parameter' l.counterfactual_`auto_parameter' post date_c1-date_c`order' /// if abs(date-13344)<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) lincom l.counterfactual_`auto_parameter' local ar1=r(estimate) local sear1=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',yes,`ar1',`sear1'," file write simulate "global_poly,order_`order',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } *separate polynomial, bic chosen, with controls *pre post with controls is equivalent to this with order 1 foreach preorder of numlist 1/3{ foreach postorder of numlist 1/3{ reg counterfactual_`auto_parameter' post pre_date_c1-pre_date_c`preorder' post_date_c1-post_date_c`postorder' /// if abs(date-13344)<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',no,,," file write simulate "separate_poly,order_`preorder'_`postorder',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } } foreach preorder of numlist 1/3{ foreach postorder of numlist 1/3{ reg counterfactual_`auto_parameter' l.counterfactual_`auto_parameter' post pre_date_c1-pre_date_c`preorder' post_date_c1-post_date_c`postorder' /// if abs(date-13344)<=365*4 & monitor==`monitor', vce(cluster ym) local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) lincom l.counterfactual_`auto_parameter' local ar1=r(estimate) local sear1=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',yes,`ar1',`sear1'," file write simulate "separate_poly,order_`preorder'_`postorder',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } } *local linear foreach bandwidth of numlist 30 { reg counterfactual_`auto_parameter' post date_c /// if abs(date-13344)<=`bandwidth' & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',no,,," file write simulate "local_linear,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } foreach bandwidth of numlist 30 { reg counterfactual_`auto_parameter' l.counterfactual_`auto_parameter' post date_c /// if abs(date-13344)<=`bandwidth' & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) lincom l.counterfactual_`auto_parameter' local ar1=r(estimate) local sear1=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',yes,`ar1',`sear1'," file write simulate "local_linear,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } *augmented local linear foreach bandwidth of numlist 30 { reg counterfactual_`auto_parameter' /// if abs(date-13344)<=365*4 & monitor==`monitor' predict temp, resid reg temp post date_c if studyperiod`bandwidth'==1 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) drop temp file write simulate "`monitor',`auto_parameter',no,,," file write simulate "local_linear_fullsample,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } foreach bandwidth of numlist 30 { reg counterfactual_`auto_parameter' l.counterfactual_`auto_parameter' /// if abs(date-13344)<=365*4 & monitor==`monitor' lincom l.counterfactual_`auto_parameter' local ar1=r(estimate) local sear1=r(se) predict temp, resid reg temp post date_c if studyperiod`bandwidth'==1 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) drop temp file write simulate "`monitor',`auto_parameter',yes,`ar1',`sear1'," file write simulate "local_linear_fullsample,bandwidth_`bandwidth',`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } } } *monthly collapse collapse (mean) counterfactual*, by(year month monitor) gen date=mdy(month,1,year) gen ym=ym(year, month) gen date_c=(date-13344)/1000 gen post=0 replace post=1 if date>=13344 xtset monitor ym, monthly foreach monitor of numlist 1/1000{ foreach auto_parameter of numlist 1(2)9{ reg counterfactual_`auto_parameter' post date_c /// if abs(date-13344)<=365*4 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',no,,," file write simulate "monthlycollapse,monthlycollapse,`bic',`r2',`n'," file write simulate "`estimate',`se'" _n reg counterfactual_`auto_parameter' l.counterfactual_`auto_parameter' post date_c /// if abs(date-13344)<=365*4 & monitor==`monitor' local r2=e(r2) local n=e(N) lincom post local estimate=r(estimate) local se=r(se) lincom l.counterfactual_`auto_parameter' local ar1=r(estimate) local sear1=r(se) estat ic matrix A=r(S) local bic1=A[1,6] local bic=round(`bic1',0.01) file write simulate "`monitor',`auto_parameter',yes,`ar1',`sear1'," file write simulate "monthlycollapse,monthlycollapse,`bic',`r2',`n'," file write simulate "`estimate',`se'" _n } } file close simulate *-----------------------------------* *--------Results and Graphs---------* *-----------------------------------* //FIGURE 1: ***NO CONFOUNDERS insheet using results_airquality_basic.csv, comma clear replace bic=. if estimator!="global_poly" & estimator!="separate_poly" bysort monitor beta_true start_date estimator: egen minbic=min(bic) gen best_poly=1 if round(bic,0.01)==round(minbic,0.01) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen orderpre=word(temp,2) if estimator=="separate_poly" gen orderpost= word(temp,3) if estimator=="separate_poly" destring orderpre orderpost, replace drop temp list estimator version beta_hat if monitor==25 & (best==1|(strmatch(estimator,"*poly*")==0)) /// & start_date==13344 //25 34 56 all are fairly representative across all specifications global monitor 25 global start_date 13344 global beta_true -0.2 use counterfactual_data_ak2009, clear gen date_c=(date-$start_date)/1000 gen treatment=0 replace treatment=1 if date>=$start_date gen outcome=ln(ozone)+$beta_true*treatment gen post=0 replace post=1 if date>=$start_date foreach y of numlist 1/9{ gen date_c`y'=date_c^`y' } foreach y of numlist 1/3{ gen pre_date_c`y'=date_c^`y'*(1-post) //separate pre- and post- polynomials } foreach y of numlist 1/3{ gen post_date_c`y'=date_c^`y'*(post) //run as two separate loops so ordering is convenient } quietly tab month, gen(MM) gen dow=dow(date) quietly tab dow, gen(DOW) foreach v in TempMax TempMin Rain Snow{ gen `v'2=`v'^2 gen `v'3=`v'^3 } gen ym=ym(year,month) gen studyperiod=1 if abs(date-$start_date)<=30 replace studyperiod=0 if studyperiod==. //panel A: global poly -- FOR MONITOR 25, USE POLY ORDER 2 reg outcome MM* DOW* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<4*365 predict resid, residuals reg resid post date_c1-date_c2 /// if monitor==$monitor & abs(date-$start_date)<4*365 predict fitted, xb gen fitted_pre=fitted if post==0 gen fitted_post=fitted if post==1 scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<4*365 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Global Polynomial") legend(off) graph export $imagespath/basicpolytext.eps, as(eps) replace graph export $imagespath/basicpolytext.png, as(png) width(900) replace scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<4*365 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel A") legend(off) graph export $imagespath/basicpoly.eps, as(eps) replace graph export $imagespath/basicpoly.png, as(png) width(900) replace drop resid fitted* //panel b: separate poly FOR MONITOR 25, USE POLY ORDERS 1 AND 2 reg outcome MM* DOW* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<4*365 predict resid, residuals reg resid post pre_date_c1-pre_date_c1 post_date_c1-post_date_c2 /// if monitor==$monitor & abs(date-$start_date)<4*365 predict fitted, xb gen fitted_pre=fitted if post==0 gen fitted_post=fitted if post==1 scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<4*365 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel B") legend(off) graph export $imagespath/basicseparatepoly.eps, as(eps) replace graph export $imagespath/basicseparatepoly.png, as(png) width(900) replace drop resid fitted* //panel c: local linear reg outcome MM* DOW* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<=30 predict resid, residuals reg resid post date_c1 /// if monitor==$monitor & abs(date-$start_date)<=30 predict fitted, xb gen fitted_pre=fitted if post==0 gen fitted_post=fitted if post==1 scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<30 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Local Linear") legend(off) graph export $imagespath/basiclintext.eps, as(eps) replace graph export $imagespath/basiclintext.png, as(png) width(900) replace scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<30 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel C") legend(off) graph export $imagespath/basiclin.eps, as(eps) replace graph export $imagespath/basiclin.png, as(png) width(900) replace drop resid fitted* //panel d,e: augmented local linear reg outcome MM* DOW* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<=365*4 predict resid, residuals reg resid post date_c1 /// if monitor==$monitor & abs(date-$start_date)<=30 predict fitted, xb gen fitted_pre=fitted if post==0 & studyperiod==1 gen fitted_post=fitted if post==1 & studyperiod==1 scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<=365*4 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel D") legend(off) graph export $imagespath/basiclinfullsample.eps, as(eps) replace graph export $imagespath/basiclinfullsample.png, as(png) width(900) replace scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<=30 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel E") legend(off) graph export $imagespath/basiclinfullsamplezoomed.eps, as(eps) replace graph export $imagespath/basiclinfullsamplezoomed.png, as(png) width(900) replace drop resid fitted* //panel f: monthly pre/post collapse (mean) outcome TempMax TempMin Rain Snow, by(monitor year month) gen date=mdy(month,1,year) format %dD_m_CY date gen date_c=(date-$start_date)/1000 gen post=0 replace post=1 if date>=$start_date quietly tab month, gen(MM) foreach v in TempMax TempMin Rain Snow{ gen `v'2=`v'^2 gen `v'3=`v'^3 } reg outcome MM* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<=365*4 predict resid, residuals reg resid post date_c if monitor==$monitor & abs(date-$start_date)<=365*4 predict fitted, xb gen fitted_pre=fitted if post==0 gen fitted_post=fitted if post==1 scatter resid fitted_pre fitted_post date /// if monitor==$monitor & abs(date-$start_date)<=365*4 /// , xline($start_date, lcolor(gs12)) /// msize(small none none) msymbol(O none none) mcolor(gs8 black black) /// connect(none l l) lcolor(gs8 black black) lwidth(thin thick thick) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)") /// title("Panel F") legend(off) graph export $imagespath/basicprepost.eps, as(eps) replace graph export $imagespath/basicprepost.png, as(png) width(900) replace //TABLE 1: ***RDIT ESTIMATES, NO CONFOUNDERS insheet using results_airquality_basic.csv, comma clear /* codebook monitor //108 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //10 start dates sum length anticipation expdecay phasein //noconfounders tab genlog //no confounders sum mintreat maxtreat //constant treatment effect */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor beta_true start_date estimator version bysort monitor beta_true start_date estimator: egen minbic=min(bic) gen best_poly=1 if round(bic,0.01)==round(minbic,0.01) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen orderpre=word(temp,2) if estimator=="separate_poly" gen orderpost= word(temp,3) if estimator=="separate_poly" destring orderpre orderpost, replace drop temp sum beta_hat order n if best==1 & estimator=="global_poly" sum beta_hat orderpre orderpost n if best==1 & estimator=="separate_poly" /* //bic tie for monitor duplicates tag monitor start_date if estimator=="global_poly" & best==1, gen(dup) tab dup drop dup duplicates tag monitor start_date if estimator=="separate_poly" & best==1, gen(dup) tab dup drop dup */ sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" sum beta_hat n if estimator=="monthlycollapse" insheet using results_noise_basic.csv, comma clear /* codebook monitor //1000 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //1 start date sum length anticipation expdecay phasein //noconfounders tab genlog //no confounders sum mintreat maxtreat //constant treatment effe ct */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor beta_true start_date estimator version bysort monitor beta_true start_date estimator: egen minbic=min(bic) gen best_poly=1 if round(bic,0.01)==round(minbic,0.01) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen orderpre=word(temp,2) if estimator=="separate_poly" gen orderpost= word(temp,3) if estimator=="separate_poly" destring orderpre orderpost, replace drop temp sum beta_hat order n if best==1 & estimator=="global_poly" sum beta_hat orderpre orderpost n if best==1 & estimator=="separate_poly" /* //bic tie for monitor duplicates tag monitor start_date if estimator=="global_poly" & best==1, gen(dup) tab dup drop dup duplicates tag monitor start_date if estimator=="separate_poly" & best==1, gen(dup) tab dup drop dup */ sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" sum beta_hat n if estimator=="monthlycollapse" //FIGURE 2: ***SHARP DECAY, BIAS TOO LARGE insheet using results_airquality_sharpdecay.csv, comma clear replace bic=. if estimator!="global_poly" isid monitor start_date estimator version length bysort monitor start_date estimator length: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace list monitor beta_hat order if best==1 & start_d==13344 & length==365 & estimator=="global_poly" global monitor 5 global order 8 global crop 1.5 global start_date 13344 global beta_true -0.2 global length 365 use counterfactual_data_ak2009, clear gen date_c=(date-$start_date)/1000 gen treatment=0 replace treatment=1 if date>=$start_date & date<$start_date+$length gen outcome=ln(ozone)+$beta_true*treatment gen post_observed=0 replace post_observed=1 if date>=$start_date gen post_true=treatment foreach y of numlist 1/9{ gen date_c`y'=date_c^`y' } quietly tab month, gen(MM) gen dow=dow(date) quietly tab dow, gen(DOW) foreach v in TempMax TempMin Rain Snow{ gen `v'2=`v'^2 gen `v'3=`v'^3 } gen ym=ym(year,month) reg outcome MM* DOW* TempMax* TempMin* Rain* Snow* /// if monitor==$monitor & abs(date-$start_date)<=365*4 predict resid, residuals reg resid post_observed date_c1-date_c$order /// if monitor==$monitor & abs(date-$start_date)<=365*4 predict fitted_observed, xb reg resid post_true date_c1-date_c$order /// if monitor==$monitor & abs(date-$start_date)<=365*4 predict fitted_true, xb label variable fitted_observed "Observed Fitted" label variable fitted_true "True Fitted" /* scatter resid fitted_o fitted_t date /// if monitor==$monitor & abs(date-$start_date)<=365*4, xline($start_date, lcolor(gs8)) /// msize(small tiny small) msymbol(Oh O O) connect(none none none) /// mcolor(gs10 black gs6) /// graphregion(color(white)) xtitle("") */ //not plotting a few outliers: //separate scatters so that we can use connect (to make nice legend) but without a line at the discontinuities twoway (scatter resid date if monitor==$monitor & abs(date-$start_date)<=365*4 /// & abs(resid)<$crop, xline($start_date, lcolor(gs12)) /// msize(small) msymbol(O) connect(none) mcolor(gs12) /// lcolor(gs12) /// graphregion(color(white)) ytitle("Simulated log ambient ozone (residuals)")) /// (scatter fitted_o fitted_t date if monitor==$monitor & abs(date-$start_date)<=365*4 /// & (date-$start_date)<0 /// & abs(resid)<$crop, xline($start_date, lcolor(gs12)) /// msymbol(none none) connect(l l) /// lcolor(black gs6) lwidth(thick vthick) /// graphregion(color(white)) ytitle("Simulated log ambient ozone (residuals)")) /// (scatter fitted_o fitted_t date if monitor==$monitor & abs(date-$start_date)<=365*4 /// & (date-$start_date)>0 & abs(date-$start_date)<365 /// & abs(resid)<$crop, xline($start_date, lcolor(gs12)) /// msymbol(none none) connect(l l) /// lcolor(black gs6) lwidth(thick vthick) /// graphregion(color(white))ytitle("Simulated log ambient ozone (residuals)")) /// (scatter fitted_o fitted_t date if monitor==$monitor & abs(date-$start_date)<=365*4 /// & (date-$start_date)>0 & abs(date-$start_date)>365 /// & abs(resid)<$crop, xline($start_date, lcolor(gs12)) /// msymbol(none none) connect(l l) /// lcolor(black gs6) lwidth(thick vthick) legend(order(1 2 3) col(1)) /// graphregion(color(white)) xtitle("") ytitle("Simulated log ambient ozone (residuals)")) graph export $imagespath/sharpdecaypoly.eps, as(eps) replace graph export $imagespath/sharpdecaypoly.png, as(png) width(1800) replace count if monitor==$monitor & abs(date-$start_date)<=365*4 & abs(resid)>=$crop & resid!=. ***TABLE: SHARP DECAY insheet using results_airquality_sharpdecay.csv, comma clear /* codebook monitor //108 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //10 start dates tab length //two lengths: one month and one year sum anticipation expdecay phasein //no other confounders tab genlog //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor start_date estimator version length bysort monitor start_date estimator length: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace sort monitor start_date length estimator version format beta_hat order preorder postorder %12.2fc format n %12.1fc foreach length of numlist 30 365{ display "best, `length', global poly" sum beta_hat order n if best==1 & length==`length' & estimator=="global_poly", format display "best, `length', separate poly" sum beta_hat preorder postorder n if best==1 & length==`length' & estimator=="separate_poly", format display "local_linear, `length'" sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" & length==`length', format display "local_linear_fullsample, `length'" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" & length==`length', format display "monthlycollapse, `length'" sum beta_hat n if estimator=="monthlycollapse" & length==`length', format } insheet using results_noise_sharpdecay.csv, comma clear /* codebook monitor //1000 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //1 start date tab length //two lengths: one month and one year sum anticipation expdecay phasein //no other confounders tab genlog //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor start_date estimator version length bysort monitor start_date estimator length: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace sort monitor start_date length estimator version format beta_hat order preorder postorder %12.2fc format n %12.1fc foreach length of numlist 30 365{ display "best, `length', global poly" sum beta_hat order n if best==1 & length==`length' & estimator=="global_poly", format display "best, `length', separate poly" sum beta_hat preorder postorder n if best==1 & length==`length' & estimator=="separate_poly", format display "local_linear, `length'" sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" & length==`length', format display "local_linear_fullsample, `length'" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" & length==`length', format display "monthlycollapse, `length'" sum beta_hat n if estimator=="monthlycollapse" & length==`length', format } //APPENDIX ***FIGURE: SHARP DECAY insheet using results_airquality_sharpdecayfull.csv, comma clear replace bic=. if estimator!="global_poly" isid monitor start_date estimator version length bysort monitor start_date estimator length: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) destring order, force replace sort monitor start_date length estimator version bysort length order: egen mean_byorder=mean(beta_hat) scatter mean_byorder length if order==1, /// mcolor(black) lcolor(black) connect(l) msymbol(none) lwidth(vthick) /// graphregion(color(white)) yline(0, lcolor(gs6)) yline(-0.2, lcolor(gs6)) /// xtitle("True treatment length, days") ytitle("Mean of beta estimates") graph export $imagespath/sharpdecayfull_order1.eps, as(eps) replace graph export $imagespath/sharpdecayfull_order1.png, as(png) width(1800) replace label variable order "order of polynomial" scatter mean_byorder length, /// by(order, legend(off) graphregion(color(white))) /// mcolor(black) lcolor(black) connect(l) msymbol(none) lwidth(vthick) /// graphregion(color(white)) yline(0, lcolor(gs6)) yline(-0.2, lcolor(gs6)) /// xtitle("True treatment length, days") ytitle("Mean of beta estimates") graph export $imagespath/sharpdecayfull_orderall.eps, as(eps) replace graph export $imagespath/sharpdecayfull_orderall.png, as(png) width(1800) replace bysort length: egen meanbest=mean(beta_hat) if best==1 scatter meanbest length if best==1, /// mcolor(black) lcolor(black) connect(l) msymbol(none) lwidth(vthick) /// graphregion(color(white)) yline(0, lcolor(gs6)) yline(-0.2, lcolor(gs6)) /// xtitle("True treatment length, days") ytitle("Mean of beta estimates") graph export $imagespath/sharpdecayfull_orderbest.eps, as(eps) replace graph export $imagespath/sharpdecayfull_orderbest.png, as(png) width(1800) replace //APPENDIX ***FIGURE: GENERALIZED LOGISTIC DECAY FUNCTION use counterfactual_data_ak2009, clear keep date duplicates drop forval b=50(25)150{ forval M=0(200)1000{ local beta_true -0.2 local start_date 13344 local A 0 local K 1 local B=-`b'/10000 local v 0.5 local Q 0.01 *generate treatment and outcome gen treatment_b`b'_M`M'=0 replace treatment_b`b'_M`M'=`beta_true' * (`A'+(`K'-`A')/(1+`Q'*exp(-`B'*(date-`start_date'-`M')))^(1/`v')) if date>=`start_date' sum treatment_b`b'_M`M' if date==13344+365*4 local temp=r(mean) display "`temp'" if `temp'<(`beta_true' * 0.1){ drop treatment_b`b'_M`M' } } } label variable treatment_b150_M0 "B = -0.015, M = 0" label variable treatment_b50_M0 "B = -0.005, M = 0" label variable treatment_b50_M200 "B = -0.005, M = 200" label variable treatment_b150_M1000 "B = -0.015, M = 1000" scatter treatment_b150_M0 treatment_b50_M0 treatment_b50_M200 treatment_b150_M1000 date /// if abs(date-$start_date)<=365*4, /// mcolor(black gs6 gs10 gs12) msize(vsmall small small med) /// graphregion(color(white)) xtitle("") graph export $imagespath/logisticdecayfunctions.eps, as(eps) replace graph export $imagespath/logisticdecayfunctions.png, as(png) width(1800) replace //TABLE 3: ***GENERALIZED LOGISTIC DECAY insheet using results_airquality_genlogdecay.csv, comma clear /*treatment has decayed by at least 90% (see above): b50: m0 to m200 b75: m0 to m600 b100: m0 to m800 b125: m0 to m1000 b150: m0 to m1000 */ drop if genlogdecay=="genlogdecay_50_400" drop if genlogdecay=="genlogdecay_50_600" drop if genlogdecay=="genlogdecay_50_800" drop if genlogdecay=="genlogdecay_50_1000" drop if genlogdecay=="genlogdecay_75_800" drop if genlogdecay=="genlogdecay_75_1000" drop if genlogdecay=="genlogdecay_100_1000" /* codebook monitor //108 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //10 start dates tab genlog //5 b parameters, up to 6 M parameters sum length anticipation expdecay phasein //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor beta_true start_date estimator version genlog bysort monitor beta_true start_date estimator genlog: egen minbic=min(bic) gen best_poly=1 if round(bic,0.00001)==round(minbic,0.00001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen orderpre=word(temp,2) if estimator=="separate_poly" gen orderpost= word(temp,3) if estimator=="separate_poly" destring orderpre orderpost, replace drop temp gen temp=subinstr(genlog,"_"," ",.) gen decayb=word(temp,2) gen decayM=word(temp,3) destring decay*, replace gen decayB=-decayb/10000 //so that table is in B, not b, scaling table decayB decayM if best==1 & estimator=="global_poly", contents(mean beta_hat sd beta_hat n beta_hat) format (%8.2f) //NOT USED: table decayB decayM if best==1 & estimator=="separate_poly", contents(mean beta_hat) format(%8.2f) table decayB decayM if estimator=="local_linear", contents(mean beta_hat) format(%8.2f) table decayB decayM if estimator=="local_linear_fullsample", contents(mean beta_hat) format(%8.2f) table decayB decayM if estimator=="monthlycollapse", contents(mean beta_hat) format(%8.2f) insheet using results_noise_genlogdecay.csv, comma clear /*treatment has decayed by at least 90% (see above): b50: m0 to m200 b75: m0 to m600 b100: m0 to m800 b125: m0 to m1000 b150: m0 to m1000 */ drop if genlogdecay=="genlogdecay_50_400" drop if genlogdecay=="genlogdecay_50_600" drop if genlogdecay=="genlogdecay_50_800" drop if genlogdecay=="genlogdecay_50_1000" drop if genlogdecay=="genlogdecay_75_800" drop if genlogdecay=="genlogdecay_75_1000" drop if genlogdecay=="genlogdecay_100_1000" /* codebook monitor //1000 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //1 start date tab genlog //5 b parameters, up to 6 M parameters sum length anticipation expdecay phasein //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor beta_true start_date estimator version genlog bysort monitor beta_true start_date estimator genlog: egen minbic=min(bic) gen best_poly=1 if round(bic,0.00001)==round(minbic,0.00001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen orderpre=word(temp,2) if estimator=="separate_poly" gen orderpost= word(temp,3) if estimator=="separate_poly" destring orderpre orderpost, replace drop temp gen temp=subinstr(genlog,"_"," ",.) gen decayb=word(temp,2) gen decayM=word(temp,3) destring decay*, replace gen decayB=-decayb/10000 //so that table is in B, not b, scaling table decayB decayM if best==1 & estimator=="global_poly", contents(mean beta_hat sd beta_hat n beta_hat) format (%8.2f) //NOT USED: table decayB decayM if best==1 & estimator=="separate_poly", contents(mean beta_hat) format(%8.2f) table decayB decayM if estimator=="local_linear", contents(mean beta_hat) format(%8.2f) table decayb decayM if estimator=="local_linear_fullsample", contents(mean beta_hat) format(%8.2f) table decayB decayM if estimator=="monthlycollapse", contents(mean beta_hat) format(%8.2f) ***TABLE: LINEAR PHASEIN //NOT USING: DIFFERENT PHASE-IN LENGTHS. ONLY SHOWING 7 DAYS, FOR BREVITY. insheet using results_airquality_phasein.csv, comma clear /* codebook monitor //108 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //10 start dates tab phasein //1 phase-in length sum length anticipation expdecay //no other confounders tab genlog //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor start_date estimator version phasein bysort monitor start_date estimator phasein: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace foreach phasein of numlist 7 { display "best, `phasein', global poly" sum beta_hat order n if best==1 & phasein==`phasein' & estimator=="global_poly" display "best, `phasein', separate poly" sum beta_hat preorder postorder n if best==1 & phasein==`phasein' & estimator=="separate_poly" display "local_linear, `phasein'" sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" & phasein==`phasein' display "local_linear_fullsample, `phasein'" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" & phasein==`phasein' display "monthlycollapse, `phasein'" sum beta_hat n if estimator=="monthlycollapse" & phasein==`phasein' } insheet using results_noise_phasein.csv, comma clear /* codebook monitor //1000 monitors tab pollutant // 1 pollutant, ozone tab beta_t //1 betas tab start_d //1 start date tab phasein //1 phase-in length sum length anticipation expdecay //no other confounders tab genlog //no other confounders */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor start_date estimator version phasein bysort monitor start_date estimator phasein: egen minbic=min(bic) gen best_poly=1 if round(bic,0.0001)==round(minbic,0.0001) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace foreach phasein of numlist 7 { display "best, `phasein', global poly" sum beta_hat order n if best==1 & phasein==`phasein' & estimator=="global_poly" display "best, `phasein', separate poly" sum beta_hat preorder postorder n if best==1 & phasein==`phasein' & estimator=="separate_poly" display "local_linear, `phasein'" sum beta_hat n if estimator=="local_linear" & version=="bandwidth_30" & phasein==`phasein' display "local_linear_fullsample, `phasein'" sum beta_hat n if estimator=="local_linear_fullsample" & version=="bandwidth_30" & phasein==`phasein' display "monthlycollapse, `phasein'" sum beta_hat n if estimator=="monthlycollapse" & phasein==`phasein' } //AUTOREGRESSION insheet using results_autoregress.csv, comma clear isid monitor auto_parameter ar1_included estimator version /* codebook monitor //1000 monitors tab auto_parameter tab ar1_included */ replace bic=. if estimator!="global_poly" & estimator!="separate_poly" isid monitor auto_parameter ar1_included estimator version bysort monitor auto_parameter ar1_included estimator: egen minbic=min(bic) gen best_poly=1 if round(bic,0.01)==round(minbic,0.01) replace best_poly=. if bic==. gen order=subinstr(version,"order_","",.) if estimator=="global_poly" destring order, force replace gen temp=subinstr(version,"_"," ",.) if estimator=="separate_poly" gen preorder=word(temp,2) gen postorder=word(temp,3) destring preorder postorder, replace table auto_parameter if ar1_included=="no" & best==1 & estimator=="global_poly", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & best==1 & estimator=="global_poly", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & best==1 & estimator=="separate_poly", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & best==1 & estimator=="separate_poly", /// contents(mean preorder mean postorder sd preorder sd postorder) format (%8.2f) table auto_parameter if ar1_included=="no" & best==1 & estimator=="separate_poly", /// contents(n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="local_linear" & version=="bandwidth_30", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="local_linear" & version=="bandwidth_30", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="local_linear_fullsample" & version=="bandwidth_30", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="local_linear_fullsample" & version=="bandwidth_30", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="monthlycollapse", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="no" & estimator=="monthlycollapse", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & best==1 & estimator=="global_poly", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & best==1 & estimator=="global_poly", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & best==1 & estimator=="separate_poly", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & best==1 & estimator=="separate_poly", /// contents(mean preorder mean postorder sd preorder sd postorder) format (%8.2f) table auto_parameter if ar1_included=="yes" & best==1 & estimator=="separate_poly", /// contents(n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="local_linear" & version=="bandwidth_30", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="local_linear" & version=="bandwidth_30", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="local_linear_fullsample" & version=="bandwidth_30", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="local_linear_fullsample" & version=="bandwidth_30", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="monthlycollapse", /// contents(mean beta_hat sd beta_hat mean ar1_estimate sd ar1_estimate) format (%8.2f) table auto_parameter if ar1_included=="yes" & estimator=="monthlycollapse", /// contents(mean order sd order n beta_hat n ar1_estimate) format (%8.2f) //ESTIMATING AR(1) PARAMETERS //////////////////////////////// //DAILY AIR QUALITY DATA FROM EPA ///////////////////////////////// clear all foreach y of numlist 2004/2005{ foreach variable in 42101 42401 42602 44201 81102 88101{ disp "******* `variable', `y' **********" insheet using daily_`variable'_`y'.csv, comma clear tab parametern tab units sum arithmeticmean sum stmaxvalue } } clear all foreach y of numlist 2004/2005{ foreach variable in 42101 42401 42602 44201 81102 88101{ insheet using daily_`variable'_`y'.csv, comma clear capture destring statecode, force replace save daily_`variable'_`y', replace } } clear all foreach y of numlist 2004/2005{ foreach variable in 42101 42401 42602 44201 81102 88101{ append using daily_`variable'_`y' capture drop pollutantstandard //string versus byte } } replace parametername="co" if strmatch(parametername,"*monoxide*") replace parametername="no2" if strmatch(parametername,"*Nitrogen*") replace parametername="ozone" if strmatch(parametername,"*Ozone*") replace parametername="so2" if strmatch(parametername,"*Sulfur*") replace parametername="pm25" if parametername=="PM2.5 - Local Conditions" replace parametername="pm10" if parametername=="PM10 Total 0-10um STP" drop countyname cityname cbsaname datum gen event=0 replace event=1 if eventtype!="None" drop eventtype address localsitename collapse (mean) mean=arithmeticmean latitude longitude, /// by(statecode countycode sitenum parametername datelocal statename) drop if statename=="Canada"|statename=="Country Of Mexico"|statename=="Puerto Rico"|statename=="Virgin Islands" isid statecode countycode sitenum parametername datelocal reshape wide mean latitude longitude, i(statec countyc siten dateloc) j(parametername) string //latitude longitude not constant rename meanco co rename meanno2 no2 rename meanozone ozone rename meanso2 so2 rename meanpm10 pm10 rename meanpm25 pm25 corr latitude* corr longitude* egen tlatitude=rowmean(latitude*) egen tlongitude=rowmean(longitude*) drop latitude* longitude* rename tlat latitude rename tlong longitude compress describe isid statec countyc siten datel save airquality, replace //WEATHER foreach y of numlist 2004/2005{ insheet using daily_TEMP_`y'.csv, comma clear capture destring statecode, force replace save daily_TEMP_`y', replace insheet using daily_WIND_`y'.csv, comma clear capture destring statecode, force replace save daily_WIND_`y', replace } clear all foreach y of numlist 2004/2005{ append using daily_TEMP_`y' append using daily_WIND_`y' } drop countyname cityname cbsaname datum drop sampleduration eventtype address localsitename drop if parametername=="Wind Direction - Resultant" drop pollutantstandard unitsofmeas //knots and fahrenheit drop observationcount //almost entirely 24 drop aqi methodcode methodname dateoflast compress describe collapse (mean) mean=arithmeticmean latitude longitude, /// by(statecode countycode sitenum parametername datelocal statename) drop if statename=="Canada"|statename=="Country Of Mexico"|statename=="Puerto Rico"|statename=="Virgin Islands" isid statecode countycode sitenum parametername datelocal replace parameter="temperature" if parameter=="Outdoor Temperature" replace parameter="windspeed" if parameter=="Wind Speed - Resultant" reshape wide mean, i(statec countyc siten dateloc) j(parametername) string rename meantemperature temperature rename meanwindspeed windspeed isid statec countyc siten datel save temperatureandwind, replace //////////////////////////////////// //AR(1) REGRESSIONS, NO PRECIPITATION, ALL STATIONS ////////////////////////////////////// use airquality, clear merge 1:1 statecode countycode sitenum datelocal using temperatureandwind, nogen egen id=group(statec countyc siten) gen date=date(datelocal,"YMD") xtset id date, daily gen dow=dow(date) gen month=month(date) quietly tab month, gen(MM) quietly tab dow, gen(DOW) xi i.month|temperature, pref(_1) noomit //linear impact of temperature varies by calendar month xi i.month|windspeed, pref(_2) noomit foreach v in temperature windspeed{ gen `v'2=`v'^2 gen `v'3=`v'^3 //cubic in temperature, windspeed } gen temperaturewind=temperature*windspeed quietly sum date local temp=r(mean) gen date1=date-`temp' //cubic time trend, demeaned gen date2=date1^2 gen date3=(date1/100)^3 //rescaled gen week=week(date) gen year=year(date) egen weekofsample=group(week year) //clustering might be needed if too unbalanced for newey-west (unless obs count rather than date) foreach pollutant in co no2 ozone so2 pm10 pm25{ gen temp=1 if `pollutant'!=. & l.`pollutant'!=. & temperature!=. & windspeed!=. bysort id: egen count_`pollutant'=count(temp) drop temp } sum count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ codebook id if count_`pollutant'>=30 } *heterogeneity file open heter using heter_daily_version1.csv, /// write text replace file write heter "id,pollutant,obs,r2,ar1estimate,ar1se" _n foreach pollutant in co no2 ozone so2 pm10 pm25{ levelsof id if `pollutant'!=. & temperature!=. & windspeed!=. & count_`pollutant'>=30, local(levels) foreach id of local levels{ file write heter "`id',`pollutant'," quietly reg `pollutant' l.`pollutant' temperature* windspeed* temperaturewind DOW* MM* date1-date3 /// if id==`id', vce(cluster weekofsample) local obs=e(N) local r2=e(r2) file write heter "`obs',`r2'," quietly lincom l.`pollutant' local temp1=r(estimate) local temp2=r(se) file write heter "`temp1',`temp2'" _n } } file close heter //filling in missing weather: bysort statec date: egen temp1=mean(temperature) bysort statec date: egen temp2=mean(windspeed) replace temperature=temp1 if temperature==. replace windspeed=temp2 if windspeed==. drop temp1 temp2 xtset id date, daily drop count_* temperature2-temperaturewind foreach v in temperature windspeed{ gen `v'2=`v'^2 gen `v'3=`v'^3 //cubic in temperature, windspeed } gen temperaturewind=temperature*windspeed foreach pollutant in co no2 ozone so2 pm10 pm25{ gen temp=1 if `pollutant'!=. & l.`pollutant'!=. & temperature!=. & windspeed!=. bysort id: egen count_`pollutant'=count(temp) drop temp } sum count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ codebook id if count_`pollutant'>=30 } *heterogeneity file open heter using heter_daily_version2.csv, /// write text replace file write heter "id,pollutant,obs,r2,ar1estimate,ar1se" _n foreach pollutant in co no2 ozone so2 pm10 pm25{ levelsof id if `pollutant'!=. & temperature!=. & windspeed!=. & count_`pollutant'>=30, local(levels) foreach id of local levels{ file write heter "`id',`pollutant'," quietly reg `pollutant' l.`pollutant' temperature* windspeed* temperaturewind DOW* MM* date1-date3 /// if id==`id', vce(cluster weekofsample) local obs=e(N) local r2=e(r2) file write heter "`obs',`r2'," quietly lincom l.`pollutant' local temp1=r(estimate) local temp2=r(se) file write heter "`temp1',`temp2'" _n } } file close heter //county level: collapse (mean) co no2 ozone so2 pm10 pm25 temperature windspeed, /// by(statec countyc datelocal date* week year weekofsample dow month MM* DOW*) isid statec countyc date egen id=group(statec countyc) xtset id date, daily foreach v in temperature windspeed{ gen `v'2=`v'^2 gen `v'3=`v'^3 //cubic in temperature, windspeed } gen temperaturewind=temperature*windspeed foreach pollutant in co no2 ozone so2 pm10 pm25{ gen temp=1 if `pollutant'!=. & l.`pollutant'!=. & temperature!=. & windspeed!=. bysort id: egen count_`pollutant'=count(temp) drop temp } sum count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ codebook id if count_`pollutant'>=30 } *heterogeneity file open heter using heter_daily_version3.csv, /// write text replace file write heter "id,pollutant,obs,r2,ar1estimate,ar1se" _n foreach pollutant in co no2 ozone so2 pm10 pm25{ levelsof id if `pollutant'!=. & temperature!=. & windspeed!=. & count_`pollutant'>=30, local(levels) foreach id of local levels{ file write heter "`id',`pollutant'," quietly reg `pollutant' l.`pollutant' temperature* windspeed* temperaturewind DOW* MM* date1-date3 /// if id==`id', vce(cluster weekofsample) local obs=e(N) local r2=e(r2) file write heter "`obs',`r2'," quietly lincom l.`pollutant' local temp1=r(estimate) local temp2=r(se) file write heter "`temp1',`temp2'" _n } } file close heter //////////////////////////////////////////// //PRECIPITATION CONTROLS, SUBSAMPLE OF STATES /////////////////////////////////////////// //6 biggest states only because computationally burdensome //"biggest" meaning most pm25 monitors (that's smallest category of the pollutants) use airquality, clear bysort statec: egen count=count(pm25) keep statec staten count duplicates drop gsort -count list statec staten count //california, pennsylvania, florida, ohio, north carolina, texas //6 42 12 39 37 48 use airquality, clear keep statec countyc siten latitude longitude duplicates drop isid statec countyc siten keep if statec==6|statec==12|statec==37|statec==39|statec==42|statec==48 save daily_data_locations, replace *Next: match to weather data. start with gridded weather data from Schlenker? Vincenty distance? *I take the monitor locations and match to one of the grids and Roberts and Schlenker insheet using gridInfo.csv, comma clear drop croparea //"cross" is very slow, and matching by vincenty within that will also be slow. //for simplicity, joinby within a state. tostring fips, replace gen statecode=substr(fips,1,1) if length(fips)==4 replace statecode=substr(fips,1,2) if length(fips)==5 drop fips destring, replace keep if statec==6|statec==12|statec==37|statec==39|statec==42|statec==48 count joinby statecode using daily_data_locations, unmatched(none) isid gridnumber latitude longitude gen longitude_grid = -125 + mod(gridnumber-1,1405)/24 gen latitude_grid = 49.9375+1/48 - ceil(gridnumber/1405)/24 vincenty latitude_g longitude_g latitude longitude, vin(distance) gsort statec countyc sitenum distance keep if statec!=statec[_n-1] | countyc!=countyc[_n-1] | siten!=siten[_n-1] sum distance isid statec countyc sitenum keep statec countyc sitenum gridnumber save monitor_x_gridnumber, replace foreach s of numlist 6 12 37 39 42 48{ foreach y of numlist 2004(1)2005{ insheet using state`s'_year`y'.csv, comma clear save state`s'_year`y', replace } } clear all foreach s of numlist 6 12 37 39 42 48{ foreach y of numlist 2004(1)2005{ append using state`s'_year`y', } } save statessix_0405, replace foreach y of numlist 2004/2005{ foreach variable in 42101 42401 42602 44201 81102 88101{ erase daily_`variable'_`y'.dta } } foreach y of numlist 2004/2005{ erase daily_TEMP_`y'.dta erase daily_WIND_`y'.dta } foreach s of numlist 6 12 37 39 42 48{ foreach y of numlist 2004(1)2005{ erase state`s'_year`y'.dta } } use airquality, clear merge m:1 statec countyc siten using monitor_x_gridnumber keep if _m==3 //34% of observations drop _m gen date=date(datel,"YMD") gen month=month(date) gen day=day(date) gen year=year(date) merge m:1 gridnumber month day year using statessix_0405 //expect lots of 3s and 2s, expect no 1s //_m==2 not a problem: weather data but no air quality data drop if _m==2 drop _m merge 1:1 statecode countycode sitenum datelocal using temperatureandwind, /// keep(master match) nogen //comparing epa's temperature (i assume fahrenheit?) to roberts and schlenker temperature (celsius) gen tmean=(tmin/2+tmax/2)*1.8+32 corr temperature tmean reg temperature tmean scatter temperature tmean //close egen id=group(statec countyc siten) xtset id date, daily gen dow=dow(date) quietly tab month, gen(MM) quietly tab dow, gen(DOW) xi i.month|tmean, pref(_1) noomit //linear impact of temperature varies by calendar month xi i.month|prec, pref(_2) noomit foreach v in tmean prec{ gen `v'2=`v'^2 gen `v'3=`v'^3 //cubic in temperature, windspeed } gen tmeanprec=tmean*prec quietly sum date local temp=r(mean) gen date1=date-`temp' //cubic time trend, demeaned gen date2=date1^2 gen date3=(date1/100)^3 //rescaled gen week=week(date) egen weekofsample=group(week year) //clustering might be needed if too unbalanced for newey-west (unless obs count rather than date) foreach pollutant in co no2 ozone so2 pm10 pm25{ gen temp=1 if `pollutant'!=. & l.`pollutant'!=. & tmean!=. & prec!=. bysort id: egen count_`pollutant'=count(temp) drop temp } sum count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ codebook id if count_`pollutant'>=30 } *heterogeneity file open heter using heter_daily_version4.csv, /// write text replace file write heter "id,pollutant,obs,r2,ar1estimate,ar1se" _n foreach pollutant in co no2 ozone so2 pm10 pm25{ levelsof id if `pollutant'!=. & tmean!=. & prec!=. & count_`pollutant'>=30, local(levels) foreach id of local levels{ file write heter "`id',`pollutant'," quietly reg `pollutant' l.`pollutant' tmean* prec* tmeanprec DOW* MM* date1-date3 /// if id==`id', vce(cluster weekofsample) local obs=e(N) local r2=e(r2) file write heter "`obs',`r2'," quietly lincom l.`pollutant' local temp1=r(estimate) local temp2=r(se) file write heter "`temp1',`temp2'" _n } } file close heter //filling in windspeed bysort statec date: egen temp2=mean(windspeed) replace windspeed=temp2 if windspeed==. drop temp2 xtset id date, daily foreach v in windspeed{ gen `v'2=`v'^2 gen `v'3=`v'^3 //cubic in temperature, windspeed } gen tmeanwind=tmean*windspeed drop count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ gen temp=1 if `pollutant'!=. & l.`pollutant'!=. & tmean!=. & windspeed!=. bysort id: egen count_`pollutant'=count(temp) drop temp } sum count_* foreach pollutant in co no2 ozone so2 pm10 pm25{ codebook id if count_`pollutant'>=30 } *heterogeneity file open heter using heter_daily_version5.csv, /// write text replace file write heter "id,pollutant,obs,r2,ar1estimate,ar1se" _n foreach pollutant in co no2 ozone so2 pm10 pm25{ levelsof id if `pollutant'!=. & temperature!=. & windspeed!=. & count_`pollutant'>=30, local(levels) foreach id of local levels{ file write heter "`id',`pollutant'," quietly reg `pollutant' l.`pollutant' tmean* prec* windspeed* tmeanprec tmeanwind DOW* MM* date1-date3 /// if id==`id', vce(cluster weekofsample) local obs=e(N) local r2=e(r2) file write heter "`obs',`r2'," quietly lincom l.`pollutant' local temp1=r(estimate) local temp2=r(se) file write heter "`temp1',`temp2'" _n } } file close heter //results insheet using heter_daily_version1.csv, comma clear names table pollutant, contents(mean ar1est mean ar1se n obs mean obs mean r2 ) insheet using heter_daily_version5.csv, comma clear names table pollutant, contents(mean ar1est mean ar1se n obs mean obs mean r2 ) //not used: insheet using heter_daily_version2.csv, comma clear names table pollutant, contents(mean ar1est mean ar1se n obs mean obs mean r2 ) insheet using heter_daily_version3.csv, comma clear names table pollutant, contents(mean ar1est mean ar1se n obs mean obs mean r2 ) insheet using heter_daily_version4.csv, comma clear names table pollutant, contents(mean ar1est mean ar1se n obs mean obs mean r2 )