clear all
set matsize 10000
/*
Note: this file compares directly estimating factor model with using switch_probit command
It shows estimates and the average and distributional treatment effects (wo standard errors).
*/

* locals
local x $x
local y $y
local s $subsample

// Open log
cap log close
log using gsem_aakvik_`x'_`y'_`s', replace


use "Output/database_model_final",clear
rename $x x
label var x "Elite HS"
rename $y y
drop if missing(y)
drop if missing(x)

// Random drop
gen random=runiform()
drop if random>$sample
drop random

// Make instrument
merge m:1 niscode sector using "Output/minimumdist_adnr"
drop if _merge==2
drop _merge
merge m:1 niscode sector using "Output/minimumdist_adnr_count"
drop if _merge==2
drop _merge

gen mindist_treat=d_scho_ASO
gen mindist_notreat=.
foreach var of varlist d_scho_noASO d_scho_mixed {
replace mindist_notreat=`var' if `var'<mindist_notreat
}

gen z=mindist_notreat-mindist_treat
label var z "Dist nonElite - dist Elite"
global propvars z

// Number of schools within 5 km distance
gen totsch5 =  tot_5km_scho_notreat + tot_5km_scho_treat

// Drop observations without control variables
foreach var of varlist $controls2{
drop if missing(`var')
}

// Selection of the sample
if $subsample==2 {
drop if totsch5==0
drop if totsch5==1 
drop if totsch5==2
drop if totsch5==3
}
if $subsample==3 {
drop if d4_scho_any< d_scho_ASO
}
if $subsample==4 {
keep if ASO2==1
}
if $subsample==5 {
keep if scho_someASO==1
}
if $subsample==6 {
drop if dropout_uncertain==1
}


*specify variables
global xvars 					z 	$controls2  	 	 
global yvars 					$controls2 					 	
	
*keep only essential info and drop missings
keep $xvars x y location

foreach var of var * { 
di "`var'"
drop if missing(`var') 
}

*create potential outcomes by setting to missing
gen y0=y if x==0
gen y1=y if x==1

*estimate
gsem ///
( $xvars 		ability@1  	-> x, probit) /// elite school
( $yvars 		ability 	-> y1, probit) /// outcomes
( $yvars 		ability 	-> y0, probit) ///
, covstruct(_lexogenous, diagonal) latent(ability ) means( ability@0) cov( ability@1) nocapslatent vce(cluster location)  
est store aakvik

drop y0* y1* 

*according to this model, what are treatment effects? 
gen zb=[x]_cons+[x]_b[z]*z

gen xb0=[y0]_cons
gen xb1=[y1]_cons


qui foreach var of varlist $controls2  { //other vars
replace zb=zb+[x]_b[`var']*`var'

replace xb0=xb0+[y0]_b[`var']*`var'
replace xb1=xb1+[y1]_b[`var']*`var'

}

gen prob1= 		normal(xb1) 

gen prob0= 		normal(xb0) 

gen te=prob1-prob0
sum te


*te and distributional effects using simulated ability
gen psel_sim=0
		
gen treat_sim=0
gen prob0_sim=0
gen prob1_sim=0
		
gen gain_sim=0
gen loose_sim=0
gen gain_tt_sim=0
gen loose_tt_sim=0 
gen gain_ut_sim=0
gen loose_ut_sim=0			
					
			
		
foreach var of varlist zb xb0 xb1 {
clonevar `var'_base=`var'
}
	

	
qui forvalues s=1/$maxsim {
	drawnorm ability_sim
	
	drawnorm e0_sim
	drawnorm e1_sim
	
	drawnorm ed_sim


	*simulation of stage 1: elite school
	replace treat_sim=(zb_base+[x]_b[ability]*ability_sim+ed_sim>0)
	
	*simulation of stage 3 (2 here): outcome
	replace prob0_sim=xb0_base	+	[y0]_b[ability]*ability_sim	+e0_sim>0	
	replace prob1_sim=xb1_base	+	[y1]_b[ability]*ability_sim	+e1_sim>0	
	
	*individual treatment effects
	replace gain_sim=(gain_sim+prob1_sim*(1-prob0_sim))
	replace loose_sim=(loose_sim+(1-prob1_sim)*prob0_sim)
	replace gain_tt_sim=(gain_tt_sim+prob1_sim*(1-prob0_sim)*treat_sim)
	replace loose_tt_sim=(loose_tt_sim+(1-prob1_sim)*prob0_sim*treat_sim)
	replace gain_ut_sim=(gain_ut_sim+prob1_sim*(1-prob0_sim)*(1-treat_sim))
	replace loose_ut_sim=(loose_ut_sim+(1-prob1_sim)*prob0_sim*(1-treat_sim)) 
	
		
	*number of times treated (needed for denominator after loop)
	replace psel_sim=psel_sim+treat_sim
	
			
	
		drop ability_sim e1_sim e0_sim ed_sim 
}

*average treatment effects
replace psel_sim=psel_sim/$maxsim

replace gain_sim=(gain_sim/$maxsim)
replace loose_sim=(loose_sim/$maxsim)
replace gain_tt_sim=(gain_tt_sim/$maxsim)/psel_sim
replace loose_tt_sim=(loose_tt_sim/$maxsim)/psel_sim
replace gain_ut_sim=(gain_ut_sim/$maxsim)/(1-psel_sim)
replace loose_ut_sim=(loose_ut_sim/$maxsim)/(1-psel_sim)

replace gain_tt_sim=. if x==0 //such that expectation over X is okay
replace loose_tt_sim=. if x==0
replace gain_ut_sim=. if x==1 
replace loose_ut_sim=. if x==1

gen ate=gain_sim-loose_sim
gen att=gain_tt_sim-loose_tt_sim
gen atnt=gain_ut_sim-loose_ut_sim

tabstat ate gain_sim loose_sim,stats(mean) c(s)  varwidth(32)
tabstat att gain_tt_sim loose_tt_sim,stats(mean) c(s)  varwidth(32)
tabstat atnt gain_ut_sim loose_ut_sim,stats(mean) c(s)  varwidth(32)

log close
