clear all
set matsize 10000

* locals
local x $x
local y $y
local s $subsample

* control variables
global controlvar opl_moe1 opl_moe2 no_ned_thuis toelage_SO male repeated ///
stad_2 stad_3 stad_4 prov_2 prov_3 prov_4 prov_5 ///
mediaaninkomen_nis bev_nis totaalbelgrel_nis hogeronderwijsrel_nis sechogerrel_nis 

// Open data
use "Output/database_model_final",clear
rename $x x
label var x "Elite HS"
rename $y y
drop if missing(y)
drop if missing(x)

// Random drop
gen random=runiform()
drop if random>$sample
drop random

// Open log
cap log close
log using main_`x'_`y'_`s'_control3, replace

// Make instrument
merge m:1 niscode sector using "Output/minimumdist_adnr"
drop if _merge==2
drop _merge
merge m:1 niscode sector using "Output/minimumdist_adnr_count"
drop if _merge==2
drop _merge

gen mindist_treat=d_scho_ASO
gen mindist_notreat=.
foreach var of varlist d_scho_noASO d_scho_mixed {
replace mindist_notreat=`var' if `var'<mindist_notreat
}

gen z=mindist_notreat-mindist_treat
label var z "Dist nonElite - dist Elite"
global propvars z

// Number of schools within 5 km distance
gen totsch5 =  tot_5km_scho_notreat + tot_5km_scho_treat

// Drop observations without control variables (based on control variables from main analysis to keep the same number of observations)
foreach var of varlist $controls2{
drop if missing(`var')
}

// Selection of the sample
if $subsample==2 {
drop if totsch5==0
drop if totsch5==1 
drop if totsch5==2
drop if totsch5==3
}
if $subsample==3 {
drop if d4_scho_any< d_scho_ASO
}
if $subsample==4 {
keep if ASO2==1
}
if $subsample==5 {
keep if scho_someASO==1
}
if $subsample==6 {
drop if dropout_uncertain==1
}

* Domain for MTE graphs (at least 10 students in both elite and non-elite schools)
if $subsample==1 {
probit x z $controlvar
predict propscore
gen prop_score=round(propscore,.01)
tab prop_score x
global dom = 0.75
}

tempfile temp_1
save `temp_1', replace

********************
*** OLS and 2SLS ***
********************

if $subsample==1 & "$y"=="nodropout_strict" {
* Table 1: Enrollment in elite and non-elite schools
sum male repeated opl_moe3 opl_moe2 opl_moe1 no_ned_thuis toelage_SO mediaaninkomen_nis mediaaninkomen totaalbelgrel_nis totaalbelgrel hogeronderwijsrel_nis hogeronderwijsrel sechogerrel_nis sechogerrel bev_nis bev mindist_treat mindist_notreat if x==1 
sum male repeated opl_moe3 opl_moe2 opl_moe1 no_ned_thuis toelage_SO mediaaninkomen_nis mediaaninkomen totaalbelgrel_nis totaalbelgrel hogeronderwijsrel_nis hogeronderwijsrel sechogerrel_nis sechogerrel bev_nis bev mindist_treat mindist_notreat if x==0 

* Extra and footnotes
sum x ASO2 y degreeontime downgrade mindist_treat mindist_notreat dropout_uncertain
tabstat mindist_treat mindist_notreat,stats(p1 p5 p10 p25 p50 p75 p90 p95 p99)

* Table 2: Track choice and study outcomes by initial school choice
sum ASO2 y degreeontime downgrade if x==1 
sum ASO2 y degreeontime downgrade if x==0 

* Table 3: First stage: choosing an elite school
	* F-stat of exclusion restrictions are computed in 2SLS regressions of Table 13
reg x z $controlvar, cluster(location)

* Table 4: Testing the instrument: effect of student characteristics on relative distance 
reg z $controlvar,cluster(location)
test $controls1 repeated

* Table 13: Obtaining a high school degree: OLS and 2SLS
reg y x $controlvar, cluster(location)
ivregress 2sls y (x=z) $controlvar, cluster(location)
estat firststage
}


***************************
*** PARAMETRIC APPROACH ***
***************************

* Estimation
switch_probit y $controlvar, select(x z $controlvar) cluster(location)

est store baseline
predict att,tt
predict ate,te
predict atnt,tu
foreach var of varlist att ate atnt {
sum `var'
scalar `var'_`i'=r(mean)
}
drop att ate atnt

*predictions for treatment effects and to calculate other effects
predict att,tt
predict ate,te
predict atnt,tu

di "TREATMENT PARAMETERS"
tabstat att ate atnt ,stats(mean) col(stats)


*collect all interesting statistics in scalars
foreach var of varlist att ate atnt  {	
	qui sum `var'
	scalar scal_`var'=r(mean)
}
	
//bootstrap
use `temp_1',clear

tempfile temp_boot
save "Output/startpred_clean_twostage",replace 


if $parallel_allow==1 {
parallel setclusters 5,force
global loopspercluster=$bootloop/5 
parallel do "twostage_draws_control_small.do", nodata
}
else {
global PLL_CLUSTERS=1
global pll_instance=1
global loopspercluster=$bootloop
do "twostage_draws_control_small.do"
}

	
*summarize the draws
use "Output/twostage_draw_1",clear
forvalues i=2/$bootloop {
merge 1:1 var using  "Output/twostage_draw_`i'",keep(3) nogen
}

egen sd=rowsd(draw*)
levelsof var,local(vars)

gen est=.
foreach var in `vars' {
replace est=scal_`var' if var=="`var'"
}
order est,before(sd)
gen star=""
replace star="*" if abs(est)-1.96*sd>0

keep var est sd star

list 

log close
