
*ANALYSIS
set more off
cd X:\Data\Workdata\702092\marco\BO\zeroinc\restart2017\baseline\JAE_replication\200520\toJAE
capture log close
cls
log using analysis_1.txt, text replace

use data_replication_JAE, clear

***************************
***************************
*TABLE 1

su ed_tert hipaytert STEMtert logPI yemp ///
   old_educp educp wom mg order sibsize fspacing durb3 ///
   old_m_i_edp m_i_edp old_sd_i_edp sd_i_edp sg_s m_i_mg enr_s m_i_sibsize m_i_agemb m_i_agef m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4
su ed_tert hipaytert STEMtert logPI yemp ///
   old_educp educp wom mg order sibsize fspacing durb3 ///
   old_m_i_edp m_i_edp old_sd_i_edp sd_i_edp sg_s m_i_mg enr_s m_i_sibsize m_i_agemb m_i_agef m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 ///
   if s_by_f_sample
su old_m_i_edp, d

***************************
***************************
*TABLE 2

preserve
su bedp_ca
replace bedp_ca = (bedp_ca-r(mean))/r(sd)
tab order, gen(oo)

*PANEL A
*Cohort and school fixed effects, and school specific cohort trends
*educp - these add controls for exclusion bias
reghdfe educp m_i_edp  bedp_ca, absorb(yob school##c.yob) vce(cluster school)
outreg2  $vars using Tab2A.xls, replace excel nocon auto(2) bdec(3) tdec(3) ctitle(educp)

*others
foreach x in wom sibsize oo1 oo2 oo3 oo4 mg agem_birth agef_birth  durb3 {
	reghdfe `x' m_i_edp ,  absorb(yob school##c.yob) vce(cluster school)
	outreg2  $vars using Tab2A.xls, append excel nocon auto(2) bdec(3) tdec(3) ctitle(`x')
}

*PANEL B
*school by family FE sample
keep if s_by_f_sample
*Cohort and school-by-family fixed effects, and school specific cohort trends
*educp - these add controls for exclusion bias
reghdfe educp m_i_edp  bedp_ca, absorb(s_by_f yob school#c.yob) vce(cluster school family)
outreg2  $vars using Tab2B.xls, replace excel nocon auto(2) bdec(3) tdec(3) ctitle(educp)

*others
foreach x in wom sibsize oo1 oo2 oo3 oo4 mg agem_birth agef_birth  durb3 {
	reghdfe `x' m_i_edp , absorb(s_by_f yob school#c.yob) vce(cluster school family)
	outreg2  $vars using Tab2B.xls, append excel nocon auto(2) bdec(3) tdec(3) ctitle(`x')
}


capture erase Tab2A.txt
capture erase Tab2B.txt
restore

****************************
****************************

*TABLE 3

preserve
bys schoolid: egen last_schoolcohort = max(yob)
bys schoolid: egen first_schoolcohort = min(yob)

drop if same_yob==1

sort family yob
by family: gen old = _n==1
by family: gen size = _N
by family: gen ord = _n
gen ex2 = old==0
expand 2 if ord==3 , gen(ex3)
expand 3 if ord==4 , gen(ex4)
expand 4 if ord==5 , gen(ex5)
expand 5 if ord==6 , gen(ex6)

replace ex2 = 0 if  ex3==1

sort family yob ex4
replace ex2 = 0 if ex4==1
by family yob ex4: replace ex3 = 1 if _n==1 & ex4==1
by family yob ex4: replace ex4 = 0 if _n==1 & ex4==1

sort family yob ex5
replace ex2 = 0 if ex5==1
by family yob ex5: replace ex3 = 1 if _n==1 & ex5==1
by family yob ex5: replace ex5 = 0 if _n==1 & ex5==1
sort family yob ex5
by family yob ex5: replace ex4 = 1 if _n==1 & ex5==1
by family yob ex5: replace ex5 = 0 if _n==1 & ex5==1

sort family yob ex6
replace ex2 = 0 if ex6==1
by family yob ex6: replace ex3 = 1 if _n==1 & ex6==1
by family yob ex6: replace ex6 = 0 if _n==1 & ex6==1
sort family yob ex6
by family yob ex6: replace ex4 = 1 if _n==1 & ex6==1
by family yob ex6: replace ex6 = 0 if _n==1 & ex6==1
sort family yob ex6
by family yob ex6: replace ex5 = 1 if _n==1 & ex6==1
by family yob ex6: replace ex6 = 0 if _n==1 & ex6==1

gsort family yob -ex2 -ex3 -ex4 -ex5 -ex6
drop old_*
foreach var in schoolid parish wom yob m_i_mg m_i_edp sd_i_edp enr_s sg_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4  durb3 last_schoolcohort order{

	by family: gen old_`var' = `var'[_n-1] if ex2==1

	by family: replace old_`var' = `var'[_n-3] if ex3==1 & ord==3

	by family: replace old_`var' = `var'[_n-4] if ex3==1 & ord==4
	by family: replace old_`var' = `var'[_n-6] if ex4==1 & ord==4

	by family: replace old_`var' = `var'[_n-5] if ex3==1 & ord==5
	by family: replace old_`var' = `var'[_n-8] if ex4==1 & ord==5
	by family: replace old_`var' = `var'[_n-10] if ex5==1 & ord==5

	by family: replace old_`var' = `var'[_n-6] if ex3==1 & ord==6
	by family: replace old_`var' = `var'[_n-10] if ex4==1 & ord==6
	by family: replace old_`var' = `var'[_n-13] if ex5==1 & ord==6
	by family: replace old_`var' = `var'[_n-15] if ex6==1 & ord==6
}

gen diff_wom = wom==old_wom
gen same_school = school==old_school
gen same_parish = parish==old_parish
ge diff_wom_wom = diff_wom*wom
ge old_closed = old_last_schoolcohort<yob
ge new_open = first_schoolcohort>old_yob
ge delta_yob = yob-old_yob

su same_school same_parish if old==0 & same_yob==0
su same_school same_parish if old==0 & same_yob==0 & educp>0
su same_school same_parish if old==0 & same_yob==0 & educp<0

capture erase Tab3.txt
capture erase Tab3.xls

reghdfe same_school old_m_i_edp ///
					if old==0 & same_yob==0, absorb(old_yob old_schoolid##c.old_yob) vce(cluster old_schoolid)
qui outreg2 using Tab3.xls, excel replace dec(3) nocons nor2 ctitle(same_school)

reghdfe same_school old_m_i_edp ///
					old_closed new_open same_parish ///
					if old==0 & same_yob==0, absorb(old_yob old_schoolid##c.old_yob) vce(cluster old_schoolid)
qui outreg2 using Tab3.xls, excel append dec(3) nocons nor2 ctitle(same_school)
reghdfe same_school old_m_i_edp ///
					old_closed new_open same_parish ///
		            wom diff_wom diff_wom_wom delta_yob old_durb agem_birth agef_birth ///
					old_sg_s old_enr_s old_m_i_sibsize old_m_i_durb old_m_i_mg ///
					if old==0 & same_yob==0, absorb(old_yob old_schoolid##c.old_yob) vce(cluster old_schoolid)
qui outreg2 using Tab3.xls, excel append dec(3) nocons nor2 ctitle(same_school)
reghdfe same_school old_m_i_edp ///
					old_closed new_open same_parish ///
		            wom diff_wom diff_wom_wom delta_yob old_durb agem_birth agef_birth ///
					old_sg_s old_enr_s old_m_i_sibsize old_m_i_durb old_m_i_mg ///
					sibsize mg educp ///
					if old==0 & same_yob==0, absorb(old_yob old_schoolid##c.old_yob) vce(cluster old_schoolid)
qui outreg2 using Tab3.xls, excel append dec(3) nocons nor2 ctitle(same_school)
capture erase Tab3.txt
restore

****************************
****************************

*TABLE 4
preserve


keep if s_by_f_sample
global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"

global vars "m_i_edp"

capture erase Tab4.txt
capture erase Tab4.xls
foreach y in ed_tert hipaytert STEMtert {

	qui reghdfe `y' $vars , absorb(s_by_f yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp

	noi reghdfe
	qui outreg2 $vars using Tab4.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, no, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls  $add_controls, absorb(s_by_f yob) vce(cluster school family)
	outreg2 $vars using Tab4.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab4.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if max_spacing<=5, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab4.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 5, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if sibsize+1==sibs_by_school, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab4.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, same school) addstat(mean outcome, `mean')

}

capture erase Tab4.txt

****************************
****************************

*TABLE 5

capture erase Tab5.txt
capture erase Tab5.xls
foreach y in logPI yemp {

	qui reghdfe `y' $vars , absorb(s_by_f yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp

	noi reghdfe
	qui outreg2 $vars using Tab5.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, no, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls  $add_controls, absorb(s_by_f yob) vce(cluster school family)
	outreg2 $vars using Tab5.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab5.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if max_spacing<=5, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab5.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 5, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if sibsize+1==sibs_by_school, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	outreg2 $vars using Tab5.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, same school) addstat(mean outcome, `mean')

}

capture erase Tab5.txt
restore

****************************
****************************

*TABLE 6

preserve
keep if s_by_f_sample
global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"
global vars "m_i_edp educp_m_i_edp"
capture erase Tab6.txt
capture erase Tab6.xls
foreach y in logPI yemp {

	qui reghdfe `y' $vars , absorb(s_by_f yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp

	noi reghdfe
	qui outreg2 $vars using Tab6.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, no, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls    $add_controls, absorb(s_by_f yob) vce(cluster school family)
	qui outreg2 $vars using Tab6.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, homog, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab6.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if max_spacing<=5, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab6.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 5, sibs, all) addstat(mean outcome, `mean')

	noi reghdfe `y' $vars $controls $add_controls if sibsize+1==sibs_by_school, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab6.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, same school) addstat(mean outcome, `mean')

}

capture erase Tab6.txt
restore

****************************
****************************

*TABLE 7

preserve

keep if s_by_f_sample

su educp, d
gen hi_pe = educp>r(p75)
scalar high = r(p75)
su educp, d
gen lo_pe = educp<=r(p25)
scalar lo = r(p25)
ge m_i_edp_hi_pe = m_i_edp*hi_pe
ge m_i_edp_lo_pe = m_i_edp*lo_pe

global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"
global vars "m_i_edp m_i_edp_lo_pe m_i_edp_hi_pe"
capture erase Tab7.txt
capture erase Tab7.xls

foreach y in ed_tert hipaytert STEMtert {

	qui reghdfe `y' $vars  , absorb(s_by_f yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp

	noi reghdfe
	qui outreg2 $vars using Tab7.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, no, trends, homog) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

	noi reghdfe `y' $vars $controls  $add_controls, absorb(s_by_f yob) vce(cluster school family)
	qui outreg2 $vars using Tab7.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, homog) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe


	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab7.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

	noi reghdfe `y' $vars $controls $add_controls if max_spacing<=5, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab7.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe


	noi reghdfe `y' $vars $controls $add_controls if sibsize+1==sibs_by_school, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab7.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

}

capture erase Tab7.txt

****************************
****************************

*TABLE 8

capture erase Tab8.txt
capture erase Tab8.xls

foreach y in logPI yemp {

	qui reghdfe `y' $vars  , absorb(s_by_f yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp

	noi reghdfe
	qui outreg2 $vars using Tab8.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, no, trends, homog) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

	noi reghdfe `y' $vars $controls  $add_controls, absorb(s_by_f yob) vce(cluster school family)
	qui outreg2 $vars using Tab8.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, homog) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe


	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab8.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

	noi reghdfe `y' $vars $controls $add_controls if max_spacing<=5, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab8.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe


	noi reghdfe `y' $vars $controls $add_controls if sibsize+1==sibs_by_school, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab8.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school) addstat(mean outcome, `mean')

	noi dis "mfx - mi_pe"
	noi lincom m_i_edp
	noi dis "mfx - lo_pe"
	noi lincom m_i_edp+m_i_edp_lo_pe
	noi dis "mfx - hi_pe"
	noi lincom m_i_edp+m_i_edp_hi_pe
	noi dis "hi-lo"
	noi lincom m_i_edp_hi_pe-m_i_edp_lo_pe

}

capture erase Tab8.txt

restore

****************************
****************************

*TABLE 9

preserve

keep if s_by_f_sample

/*
merge 1:1 pnr using X:\Data\Workdata\702092\marco\BO\zeroinc\coworkers, keepusing(peers_in_f)
drop if _m==2
drop _m
merge 1:1 pnr using X:\Data\Workdata\702092\marco\BO\zeroinc\parents_coworkers, keepusing(peersp_in_f)
drop if _m==2
drop _m
merge 1:1 pnr using X:\Data\Workdata\702092\marco\BO\zeroinc\restart2017\baseline\150719\zip1531, keepusing(sameregion1531)
drop if _m==2
drop _m
*/

global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"

capture erase Tab9.txt
capture erase Tab9.xls

foreach y in self31 peers_in_f peersp_in_f sameregion1531{

global vars "m_i_edp"

qui reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
qui ge temp = e(sample)
qui summ `y' if temp
qui local mean = r(mean)
qui drop temp

noi reghdfe
outreg2 $vars using Tab9.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

global vars "m_i_edp educp_m_i_edp"

noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
outreg2 $vars using Tab9.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

}

capture erase Tab9.txt

restore

****************************
****************************

*TABLE 10

preserve

keep if s_by_f_sample

global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"

capture erase Tab10.xls
capture erase Tab10.txt

foreach y in logPI yemp{

	global vars "m_i_edp sd_i_edp"

	qui reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui ge temp = e(sample)
	qui summ `y' if temp
	qui local mean = r(mean)
	qui drop temp
	noi reghdfe
	qui outreg2 $vars using Tab10.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

	global vars "m_i_edp educp_m_i_edp sd_i_edp educp_sd_i_edp"
	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)
	qui outreg2 $vars using Tab10.xls, excel append dec(3) nocons nor2 ctitle(`y') addtext(base X, yes, add X, yes, trends, by school, Max spacing, 17, sibs, all) addstat(mean outcome, `mean')

}

restore

capture erase Tab10.txt

****************************
****************************

*FIGURE 1

preserve
*check Rsquared and graph
capture drop resid*

global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"

foreach i in  m_i_edp {

	*sbyf
	reghdfe `i' $controls $add_controls if s_by_f_sample, absorb(s_by_f yob school#c.yob) res(resid_`i')

	su resid_`i' if s_by_f_sample

	cap drop temp

	su `i' if s_by_f_sample
	gen temp = `i' - r(mean)

	kdensity temp if s_by_f_sample , saving(`i'.gph, replace) graphregion(fcolor(white)) lcolor(black) xtitle(E(PE))

	kdensity resid_`i' if s_by_f_sample , saving(resid_`i'.gph, replace) graphregion(fcolor(white)) lcolor(black) xtitle(SbyF resid E(PE))

	graph combine `i'.gph resid_`i'.gph , xcommon ycommon saving(Fig1, replace) graphregion(fcolor(white)) rows(1)

	capture erase `i'.gph
	capture erase redis_`i'.gph

}
restore

****************************
****************************
*FIGURE 2
*THE FIGURE IS AN MS EXCEL PLOT OF THIS TABLE

preserve
ge max_spacing_pool = max_spacing
replace max_spacing_pool=11 if max_spacing>11

ta max_spacing_pool if one_s_by_f & s_by_f_sample

restore

****************************
****************************
*FIGURE 3

global controls "wom i.order"
global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"

preserve

keep if s_by_f_sample
su educp
scalar m = r(mean)
bys s_by_f: gen one_sf = _n==1
bys s_by_f: egen avg_epe = mean(m_i_edp)
gen dev_epe = m_i_edp - avg_epe

su dev_epe,d
su dev_epe if educp>0,d
su dev_epe if educp<0,d
twoway (kdensity dev_epe, lcolor(black) lpattern(solid) ) (kdensity dev_epe if educp>0, lcolor(black) lpattern(shortdash)) (kdensity dev_epe if educp<=0, lcolor(black) lpattern(longdash)), ///
        graphregion(fcolor(white)) xtitle("Deviation in E(PE) from school-by-family average") ytitle(Density) legend(order(1 "Full sample" 2 "High PE" 3 "Low PE")) saving(dev_epe_yPE.gph, replace)

reghdfe m_i_edp $controls $add_controls if s_by_f_sample, absorb(s_by_f yob school#c.yob) res(resid)
kdensity resid

bys s_by_f: egen avg_resid = mean(resid)
gen dev_resid = resid - avg_resid

su dev_resid,d
su dev_resid if educp>0,d
su dev_resid if educp<0,d
twoway (kdensity dev_resid, lcolor(black) lpattern(solid) ) (kdensity dev_resid if educp>0, lcolor(black) lpattern(shortdash)) (kdensity dev_resid if educp<=0, lcolor(black) lpattern(longdash)), ///
        graphregion(fcolor(white)) xtitle("Deviation in E(PE) residuals from school-by-family average") ytitle(Density) legend(order(1 "Full sample" 2 "High PE" 3 "Low PE")) saving(dev_res_yPE.gph, replace)

graph combine dev_epe_yPE.gph  dev_res_yPE.gph, xcommon ycommon graphregion(fcolor(white)) saving(Fig3.gph, replace)

capture erase dev_epe_yPE.gph
capture erase dev_res_yPE.gph

*Related descriptives mentioned in text

bys s_by_f: egen sdepe = sd(dev_epe)
bys s_by_f: egen sdresid = sd(dev_resid)
su sd*
bys s_by_f: egen min = min(m_i_edp)
bys s_by_f: egen max = max(m_i_edp)
gen diff = max-min
su diff if one_s_by_f, d
bys s_by_f: egen maxr = max(resid)
bys s_by_f: egen minr = min(resid)
gen diffr = maxr-minr
su diffr if one_s_by_f, d
su diff if one_s_by_f & educp>0, d
su diff if one_s_by_f & educp<=0, d
su diffr if one_s_by_f & educp>0, d
su diffr if one_s_by_f & educp<=0, d

restore


****************************
****************************
*FIGURE 4

foreach y in logPI yemp {

	use data_replication_JAE, clear
	keep if s_by_f_sample
	global controls "wom i.order"
	global add_controls "durb3 sg_s enr_s m_i_agemb m_i_agef m_i_sibsize m_i_durb3 m_i_ibo1 m_i_ibo2 m_i_ibo3 m_i_ibo4 m_i_mg"
	global vars "m_i_edp educp_m_i_edp"

	noi reghdfe `y' $vars $controls $add_controls, absorb(s_by_f school#c.yob yob) vce(cluster school family)

	foreach x in m_i_edp{
		dis "graph `x' and educp"
		local count = 0
		qui forvalues i = -2(0.2)2{
			local count = `count' +1
			lincom (_b[`x']+(`i')*(_b[educp_`x']))
			scalar `y'_`x'_mb`count' = r(estimate)
			scalar `y'_`x'_mse`count' = r(se)
		}
		clear
		set obs 21
		gen mb = .
		gen mse = .
		gen m_i_edp = .
		local count = 0
		ge nobs = _n
		qui forvalues i = -2(0.2)2{
			local count = `count' +1
			replace m_i_edp = `i' if nobs == `count'
			replace mb = `y'_`x'_mb`count' if nobs == `count'
			replace mse = `y'_`x'_mse`count' if nobs == `count'
		}
		ge lo_mb = mb-1.96*mse
		ge hi_mb = mb+1.96*mse
		sort m_i_edp
		twoway (scatter mb m_i_edp, mcolor(black) connect(l) lpattern(solid) lwidth(thin) lcolor(black) msymbol(D) msize(small)) ///
			   (scatter lo_mb m_i_edp, mcolor(black) connect(l) lpattern(dash) lwidth(thin) lcolor(black) msymbol(none) msize(small)) ///
			   (scatter hi_mb m_i_edp, mcolor(black) connect(l) lpattern(dash) lwidth(thin) lcolor(black) msymbol(none) msize(small)) , yline(0, lcolor(black)) ///
			   graphregion(fcolor(white)) legend(off) ytitle("Marginal effect of E(PE)") xtitle("PE") saving(Fig4_`y'.gph, replace)

	}
}

use data_replication_JAE, clear

****************************
****************************
*FIGURE 5
preserve
/*
merge 1:1 pnr using X:\Data\WorkData\702092\marco\BO\zeroinc\restart2017\baseline\150719\peers_background_230819, keepusing(mean_edp sdev_edp)
keep if _m==3
drop _m
*/
set matsize 10000

bys yob school: keep if _n==1

count

sum mean_edp
replace mean_edp = (mean_edp-r(mean))/r(sd)
replace sdev_edp = (sdev_edp)/r(sd)
su sdev_edp
replace sdev_edp = sdev_edp-r(mean)
sum mean_edp sdev_edp
scatter sdev_edp mean_edp , graphregion(fcolor(white)) msize(small) mcolor(gs7) ///
		ytitle("School-by-cohort SD of parental education") xtitle("School-by-cohort mean of parental education") ///
		saving("Fig5.gph", replace)

restore

cap log close

clear
