 ************************************************
* Creates the data set used in the estimation  *
* we use in the main estimation; Cohort K is used
************************************************


clear all
set more off
cd "C:\Users\Bernhard\Desktop\LSAC_Data" //set working directory here


** The following are only determined in Wave 1
use  "lsacgrk4.dta",clear

* main parent
gen mainpar_mother = (zf02m2==2) 
*drop if mainpar_mother == 0

* background characteristic of study child
gen female = (zf02m1==2)
gen young_cohort = (cf03m1==4) // those are the young ones in each wave

* birth and initial health of study child
gen birthweight = (zhs03a / 1000) // birthweight in kilos
replace birthweight = . if birthweight<0
gen prob_birth = (zhs05a==1) // if birth was intensive one
gen gestation = zhs04a // gestation time
gen prem_birth = 0
replace prem_birth = 1 if gestation>-1 & gestation<=37 // premature birth
gen multi_birth = zhs06>=2 // twin birth or others


* handedness
gen left_hand= cid24c==2
gen both_hands = cid24c==3 | cid24c==-2
gen handedmissing = cid24c<=-3 | cid24c==.
gen right_hands = left_hand==0 & both_hand==0 & handedmissing==0
drop if handedmissing==1
drop handedmissing

* teacher assessed development (we only have this for wave 1)
rename ccnfsad advantageindex
replace advantageindex = advantageindex/100
rename cgd05a tsocemot
replace tsocemot = . if tsocemot<0
rename cgd05b tlearning
replace tlearning = . if tlearning<0
rename cgd05c tgrsmotor
replace tgrsmotor = . if tgrsmotor<0
rename cgd05d tfinemotor
replace tfinemotor = . if tfinemotor<0
rename cgd05e texplang
replace texplang = . if texplang<0
rename cgd05f treclang
replace treclang = . if treclang<0

gen poorgmotor = 0 if tgrsmotor!=.
replace poorgmotor = 1 if tgrsmotor>=3 & tgrsmotor<=4
gen poorrlang = 0 if treclang!=.
replace poorrlang = 1 if treclang>=3 & treclang<=4
gen poorfmotor = 0 if tfinemotor!=.
replace poorfmotor = 1 if tfinemotor>=3 & tfinemotor<=4
gen poorlearn = 0 if tlearning!=.
replace poorlearn = 1 if tlearning>=3 & tlearning<=4
gen poorelang = 0 if texplang!=.
replace poorelang = 1 if texplang>=3 & texplang<=4
gen pooremot = 0 if tsocemot!=.
replace pooremot = 1 if tsocemot>=3 & tsocemot<=4
egen cdevelopment = rowtotal(poorgmotor poorrlang poorfmotor poorlearn pooremot poorelang)
drop if tgrsmotor == . | treclang == . | tfinemotor == . | tlearning == . | tsocemot == . | texplang == .   

* pre-school
gen pre_school = (cpc06a1 == 11)
gen pre_year = (cpc06a1 == 12)
gen pre_others = (pre_year == 0 & pre_school == 0)

keep hicid female young_cohort birthweight prob_birth gestation prem_birth multi_birth ///
	 left_hand right_hand both_hands ///
	 poorgmotor poorrlang poorfmotor poorlearn poorelang pooremot cdevelopment ///
	 tsocemot tlearning tfinemotor tgrsmotor treclang texplang ///
	 pre_school pre_year pre_others 
compress
save "main_data.dta",replace


********************************************************************************
** The following characteristics can change wave by wave and hence we delete ***
** prefix variables here                                                     ***
********************************************************************************

** Now get outcome and lf variables for each wave
** set wave identifers
local waveno 4 6 8 10 12 // age 4-5, 6-7, 8-9, 10-11, 12-13
local prefixx c d e f g

foreach kk of local waveno {

	use "lsacgrk`kk'.dta",clear
	
	if wave == 1 {
		local pp c
	}
	else if wave == 2 {
		local pp d
	}
	else if wave == 3 {
		local pp e
	}
	else if wave == 4 {
		local pp f
	}
	else if wave == 5 {
		local pp g
	}
	else if wave == 6 {
		local pp h
	}
	
	gen biomoth = `pp'bmoth
	gen mfull = (`pp'mwork == 1)
	gen mpart = (`pp'mwork == 2)
	gen mincom = `pp'fn09mi // imputed and not imputed largely similar
	replace mincom = . if mincom < 0
	gen mhours = `pp'pw09m // usual hours worked
	replace mhours = . if mhours < 0
	gen mdegree = `pp'fd08m3a==1 | `pp'fd08m3a==2 | `pp'fd08m3a==3
	gen mcertif = `pp'fd08m3a==4 | `pp'fd08m3a==5
	gen myear12 = `pp'fd08m1==1 & (mdegree==0 & mcertif==0)
	gen mothers = mdegre == 0 & mcertif == 0 & myear12 == 0
	gen mpoorhe = (`pp'hs13m==4 | `pp'hs13m==5)
	gen mbiolog = (`pp'bmoth == 1)
	gen mnonaus = (zf09`pp'm != 1101)
	gen mage = `pp'f03`pp'm
	replace mage = . if mage < 0
	drop if mage ==  .
	
	gen ffull = (`pp'fwork == 1)
	gen fpart = (`pp'fwork == 2)
	gen fincom = `pp'fn09fi // imputed and not imputed largely similar
	gen fhours = `pp'pw09f // usual hours worked
	replace fhours = . if fhours < 0
	replace fincom = . if fincom < 0
	gen fdegree = `pp'fd08f3a==1 | `pp'fd08f3a==2 | `pp'fd08f3a==3
	gen fcertif = `pp'fd08f3a==4 | `pp'fd08f3a==5
	gen fyear12 = `pp'fd08f1==1 & (fdegree==0 & fcertif==0)
	gen fothers = fdegre == 0 & fcertif == 0 & fyear12 == 0
	gen fpoorhe = (`pp'hs13f==4 | `pp'hs13f==5)
	gen fbiolog = (`pp'bfath == 1)
	gen fnonaus = (zf09`pp'f != 1101)
	gen fage = `pp'f03`pp'f
	replace fage = . if fage < 0

	gen no_youngsib = `pp'nyngsib
	replace no_youngsib = . if no_youngsib < 0
	gen no_oldsib = `pp'noldsib
	replace no_oldsib = . if no_oldsib < 0
	gen par_onparent = (`pp'parpart == 1)

	keep hicid wave par_onparent *degree *certif *year12 *others *biolog *nonaus *full *part *incom* *hours no_* mage fage biomoth
	
	
	if `kk' == 4{
		compress
		save "labor.dta",replace
	}
	else {
		append using "labor.dta"
		compress
		save "labor.dta",replace
	}
}

use "main_data.dta",clear
merge 1:m hicid using "labor.dta"
drop if _merge == 2
drop _merge

** some more adjusting and condensing
*local varsf fage fful fpart fincom fhours fnonaus fhigh fiddl fothe
*foreach kk of local varsf {
*	replace `kk' = 0 if par_onpar == 1
*}

replace mhours = . if mhours != . & mfull == 0 & mpart == 0
replace fhours = . if fhours != . & ffull == 0 & fpart == 0

gen mhigh = (mdegree == 1 | mcertif == 1)
gen middl = (myear12 == 1)
gen fhigh = (fdegree == 1 | fcertif == 1)
gen fiddl = (fyear12 == 1)


local mothvars mage mnonaus mhigh middl par_onparent no_youngsib no_oldsib
local fathvars fage fnonaus fhigh fiddl 

foreach kk of local mothvars {
	bysort hicid (wave): gen `kk'_first = `kk'[1]
}
foreach kk of local fathvars {
	bysort hicid (wave): gen `kk'_first = `kk'[1]
}

** restrict estimation sample
** to biological mothers within the age range, those with unusal high hours
** and those with no valid info
keep if biomoth == 1
*keep if mage_first >= 25 & mage_first <= 45
*drop if mhours > 60 & mhours != .
drop if mincom == 0 & mhours > 0 //drop those who say that they are working but don't receive income


** creat the data file
sort hicid wave
local outvars hicid	wave mage_first mnonaus_first mhigh_first middl_first no_youngsib_first no_oldsib_first par_onparent_first	///
			  fage_first fnonaus_first fhigh_first fiddl_first ///
			  female young_cohort prob_birth prem_birth	multi_birth pre_school pre_year pre_others ///
			  left_hand	both_hands ///
			  mfull	mpart mhours mincom	///
			  ffull fpart fhours fincom	///
			  cdevelopment
			  
outsheet  `outvars' using labor_sample_unrest.csv, nolabel  replace












