set more off

cap log close
log using "Output/distance",replace

use "Output/database_model_allyears",clear

*note that g_lon g_lat are coordinates of schools, not students
collapse (mean) g_lon g_lat scho_ASO scho_someTSO scho_someKSO scho_someBSO scho_noASO scho_someASO scho_mixed ///
		   ,by(adnr)

save "Output/schoolchar_adnr",replace

*qui{
set more off
use  "Output/schoolchar_adnr",clear

sum

*no scho_TSO_KSO -> drop
gen scho_any=scho_ASO+scho_noASO+scho_mixed

 set more off
 global schools scho_any scho_ASO scho_noASO scho_someASO scho_someTSO scho_someKSO scho_someBSO scho_mixed
 
*take extremely far distances if unknown 
gen random=runiform()
replace g_lon=-179+0.01*random if missing(g_lon)
replace g_lat=-89-0.01*random if missing(g_lat)

 tempfile temp_adnr
 sort adnr 
 gen n=_n
 local N=_N
 keep adnr n g_lon g_lat $schools  
 save `temp_adnr',replace
 
append using "Source/shape file\Belgium\statsector_data_be_1984",gen(mindonor)
gen donor=1-mindonor
replace g_lat=y_centr if missing(g_lat)
replace g_lon=x_centr if missing(g_lon)

tempfile restorethis
save `restorethis',replace

tempfile temp
keep ID_be
drop if missing(ID_be)
duplicates drop
save `temp',replace

 foreach track of global schools {
 use `restorethis',clear
 di "`track'"
 
 keep if mindonor==1 | (mindonor==0 & `track'==1)
 replace adnr=10000000+_n if missing(adnr)

 sum `track' if `track'==1
 scalar scal_N=r(N)
 
 distmatch ,id(adnr) lat(g_lat) lon(g_lon) donor(donor)   recipient(mindonor) km nearest(5)
 keep ID_be CODE1001 _dist* _adnr* mindonor
 replace _dist1=. if _dist1>1000
 rename _dist1 d_`track'
 rename _adnr1 id_`track'
 
 forvalues i=2/5 {
 replace _dist`i'=. if _dist`i'>1000 
 rename _dist`i' d`i'_`track'
 rename _adnr`i' id`i'_`track'
 }
 
 keep if mindonor==1
 merge 1:1 ID_be using `temp'
 drop _merge

 save `temp',replace
 }
 
 use `temp',clear
 
drop mindonor
drop ID_be
di _N

*take mean distance if two coordinates for a sector
*drop id_ if multiple schools are therefore possible matches
 foreach track of global schools {
tempvar temp
cap bysort CODE1001: egen `temp'=mean(id_`track')
cap replace id_`track'=. if id_`track'~=`temp'
cap drop `temp'
}

collapse d*_* id*_*,by(CODE1001)
di _N

gen niscode=substr(CODE1001,1,5)
destring niscode,replace
gen sector=substr(CODE1001,6,.)
order niscode sector
drop CODE1001

save  "Output/minimumdist_adnr",replace


log close
