set more off

cap log close
log using "Output/data_management",replace

use "Output/database_model",clear
rename sector sectorcode

//Merge data at st sector level
merge m:1 niscode sectorcode using "Output/census2001"
tab sectorcode if _merge==1,missing
drop if _merge==2
drop _merge
	
//add income data of 2004
merge m:1 niscode sectorcode using "Output/SS_inkomen2004",keepusing(mediaaninkomen gemiddeldinkomen)
tab sectorcode if _merge==1,missing
drop if _merge==2
drop _merge
	

//Merge data at municipality level
merge m:1 niscode using "Output/NIS_census2001"
tab niscode if _merge==1,missing
drop if _merge==2
drop _merge

//add income data of 2004
merge m:1 niscode using "Output/NIS_inkomen2004",keepusing(mediaaninkomen_nis gemiddeldinkomen_nis)
tab sectorcode if _merge==1,missing
drop if _merge==2
drop _merge
	

rename bevolkingsdichtheid100 bev
rename bevolkingsdichtheid100_nis bev_nis


gen stad=4 //rural
*big citites
replace stad=1 if niscode==11002 //Antwerpen
replace stad=1 if niscode==35013 //Oostende
replace stad=1 if niscode==44021 //Gent
replace stad=1 if niscode==71022 //Hasselt
replace stad=1 if niscode==24062 //Leuven
replace stad=1 if niscode==31005 //Brugge
*regional cities
replace stad=2 if niscode==46021 //Sint-Niklaas
replace stad=2 if niscode==36015 //Roeselare
replace stad=2 if niscode==13040 //Turnhout
replace stad=2 if niscode==12025 //Mechelen
replace stad=2 if niscode==23088 //Vilvoorde
replace stad=2 if niscode==34022 //Kortrijk
replace stad=2 if niscode==71016 //Genk
replace stad=2 if niscode==41002 //Aalst
*medium-sized cities
replace stad=3 if niscode==42025 //Wetteren
replace stad=3 if niscode==73083 //Tongeren
replace stad=3 if niscode==33011 //Ieper
replace stad=3 if niscode==13011 //Herentals
replace stad=3 if niscode==13025 //Mol
replace stad=3 if niscode==45035 //Oudenaarde
replace stad=3 if niscode==71053 //Sint-Truiden
replace stad=3 if niscode==41018 //Geraardsbergen
replace stad=3 if niscode==32003 //Diksmuide
replace stad=3 if niscode==23027 //Halle
replace stad=3 if niscode==41081 //Zottegem
replace stad=3 if niscode==38025 //Veurne
replace stad=3 if niscode==31033 //Torhout
replace stad=3 if niscode==71034 //Leopoldsburg
replace stad=3 if niscode==13008 //Geel
replace stad=3 if niscode==43005 //Eeklo
replace stad=3 if niscode==42006 //Dendermonde
replace stad=3 if niscode==41048 //Ninove
replace stad=3 if niscode==37015 //Tielt
replace stad=3 if niscode==24107 //Tienen
replace stad=3 if niscode==33021 //Poperinge
replace stad=3 if niscode==24001 //Aarschot
replace stad=3 if niscode==12021 //Lier
replace stad=3 if niscode==24020 //Diest

*create provincial dummies 
tostring niscode,gen(prov)
replace prov=substr(prov,1,1)
destring prov,replace

keep if prov==1 | prov==2 | prov==3 | prov==4 | prov==7

tab prov,gen(prov_)
drop prov_1	
	
tab stad,gen(stad_)
drop stad_1

rename sectorcode sector

* scale income variables
replace mediaaninkomen = mediaaninkomen/1000
replace mediaaninkomen_nis = mediaaninkomen_nis/1000

* location variable
egen location=group(niscode sector)

* define cohort
gen cohort=first+1 //first was based on last year elementary education

* dummy for drop out uncertain
gen dropout_uncertain=0 if !missing(nodropout_strict)
replace dropout_uncertain=1 if !missing(nodropout_strict) & missing(nodropout_wounknown)

* keep and simplify
global keep  scho_ASO nodropout_strict degreeontime downgrade nodropout_soft ASO2 ASO3 scho_someASO dropout_uncertain /// treatment, outcomes and sample restrictions
		cohort opl_moe1 opl_moe2 opl_moe3 no_ned_thuis toelage_SO male repeated /// indiv char
		stad_2 stad_3 stad_4 prov_2 prov_3 prov_4 prov_5  /// dummy variables
		mediaaninkomen bev totaalbelgrel hogeronderwijsrel sechogerrel /// statistical sector variables
		mediaaninkomen_nis bev_nis totaalbelgrel_nis hogeronderwijsrel_nis sechogerrel_nis /// municipality variables
		location niscode sector // location

keep $keep

gen id=_n

order id $keep

sum

* set intermediate outcomes to 0 if missing
replace downgrade=0 if missing(downgrade)
replace ASO2=0 if missing(ASO2)
replace ASO3=0 if missing(ASO3)

codebook id if cohort==2003
codebook id if cohort==2004

* drop if missing inidividual info
foreach var of varlist cohort opl_moe1 opl_moe2 opl_moe3 no_ned_thuis toelage_SO male repeated {
drop if missing(`var')
}

codebook id if cohort==2003
codebook id if cohort==2004

* drop if missing location info
foreach var of varlist location niscode sector {
drop if missing(`var')
}

codebook id if cohort==2003
codebook id if cohort==2004

* drop if missing neighborhood characteristics
foreach var of varlist stad_2 stad_3 stad_4 prov_2 prov_3 prov_4 prov_5  /// dummy variables
		mediaaninkomen bev totaalbelgrel hogeronderwijsrel sechogerrel /// statistical sector variables
		mediaaninkomen_nis bev_nis totaalbelgrel_nis hogeronderwijsrel_nis sechogerrel_nis  { // municipality variables
drop if missing(`var')
}

codebook id if cohort==2003
codebook id if cohort==2004

sum

save "Output/database_model_final", replace

log close

