* This do file saves a data set on the household level
clear all

use ${output_data}\BHPSpanelAdjusted, replace

global vars "WorkHours SaveForChildren NChildren BYBaby* noBaby BioBaby NatChildren NnChildren Divorce DivorcewaveM ExpFirst WaveFromFirst Wealth save savey mAge* qAge* doiy4 BirthYear* Age* ExpChild ExpNChild TargetChild age nkids nchild lprnt NumKids Childless nch02  nch34 nch511 nch1215 nch1618 lchmor lchmorn Income Food saved AnnualPay fiyr IMP2* Married OwnHouse Highskilled White NoGirls doby"
tab noBaby if sex==2 & noBaby>0 & age>20 & age<62
********************************************************************************************
* construct household data set                                                             *
* *****************************************************************************************
keep if hgr2r==1|hgr2r==2|hgr2r==3  /* only keep reference person and spouse */
tab noBaby if sex==2 & noBaby>0 & age>20 & age<62
gen nn=1
bysort hid wave: egen nnhh=sum(nn)

tab nnhh mlstat                                                                       

gen Divorce= 1 if nnhh==1  /*mlstat==2|mlstat==3 */
gen Divorcewave=wave if Divorce==1
bys pid: egen DivorcewaveM=max(Divorcewave)
drop Divorcewave  
tab noBaby nnhh if sex==2 & noBaby>0 & age>20 & age<62
keep if nnhh==2  /* only keep obs where there are two spouses in the household */

bysort pid: egen sex_temp = max(sex)
replace sex = sex_temp 	/* Let households with missing sex information in some years have the same sex as in others*/
drop sex_temp

sort hid wave sex

by hid wave: egen nsex=sum(sex)
tab nsex
keep if nsex==3 /* delete same sex couple */


**********************************************************************************
* Construct household identifier                                             *
**********************************************************************************
keep $vars hid wave sex pid
reshape wide $vars pid, i(hid wave) j(sex)  

sort pid2 
save temp1, replace

collapse (sd) sd=pid1 , by(pid2)
gen unstable = sd>0 & !missing(sd)
drop sd

merge 1:m pid2 using temp1

sort pid2 pid1 wave

by pid2 pid1: gen PwS =_n /*running from 1 within each couple*/

by pid2: g temp = 1 if PwS==1
g HID = sum(temp) /*construct the new household identifier based on these "new" couples*/

gen dpartner=pid1-pid1[_n-1] if pid2==pid2[_n-1] 

**********
* Let the new HID be the household identifier
drop hid
ren HID hid
order hid, first

sort hid wave

bys hid: egen SecMar=max(dpartner) if !missing(dpartner)

replace SecMar=(SecMar>0) if !missing(SecMar)


**********************************************************************************
* Asign households variables as the wife's value first and then husband                                       *
**********************************************************************************
* Check if expectations are the same 
tab ExpChild1 ExpChild2  if inlist(wave, 2,8,12,13,17)  , missing col chi
tab TargetChild1 TargetChild2  if inlist(wave, 2,8,12,13,17)  , missing col chi

*NEW:ALTERNATIVE
cap drop ExpFirstA1
by hid: egen ExpFirstA1 = max(cond(WaveFromFirst1<0),ExpChild1,.)
cap drop ExpFirstA2
by hid: egen ExpFirstA2 = max(cond(WaveFromFirst2<0),ExpChild2,.)
local var = "ExpFirstA"
g `var' = `var'2
replace `var' = `var'1 if missing(`var'2)

tab ExpFirstA1 if WaveFromFirst1==0 & !missing(WaveFromFirst1)
tab ExpFirstA2 if WaveFromFirst2==0 & !missing(WaveFromFirst2)
***********


foreach var in ExpChild ExpNChild  Married savey OwnHouse TargetChild /*ExpFirst*/{
	g `var' = `var'2
	replace `var' = `var'1 if missing(`var'2)
	drop `var'1 `var'2
}
replace ExpNChild=. if ExpChild==1 & ExpNChild==0

* Age of children:
forvalues kid=1/7 {
	gen BirthYear`kid' = BirthYear`kid'2
	replace BirthYear`kid' = BirthYear`kid'1 if missing(BirthYear`kid'2)
	drop BirthYear`kid'1 BirthYear`kid'2
}
foreach wav in m q {
forvalues kid=1/6 {
	gen `wav'Age`kid' = `wav'Age`kid'2
	replace `wav'Age`kid' = `wav'Age`kid'1 if missing(`wav'Age`kid'2)
	drop `wav'Age`kid'1 `wav'Age`kid'2
}
}

**********************************************************************************
* Asign households variables as the wife's only                                       *
**********************************************************************************
*tab ExpFirst2 if WaveFromFirst2==0 & !missing(WaveFromFirst2)
*tab ExpFirst1 if WaveFromFirst1==0 & !missing(WaveFromFirst1)

*br pid2 pid1 hid wave ExpFirst* WaveFromFirst* NumKids* ExpChil* if pid1==70751498

*SOMETHING GOES WRONG:
* + not using the males reduce the number of non-expecters to 30..
* + there cannot be jump in observations from when goes from NumKids=0 to NumKids=1. We could, however, infer when the child arrived using the age of the child..
* + lige nu tages ExpFirst potentielt fra FØR parret blev dannet: Giver det faktum at for pid1==70751498 så ser det ud somom ovenver, at vi ikke kender exp før barnet kom. Men det gør vi for manden!
* ALTERNATIV: brug begge's info, men først når de har formet par. Det ville være det mest naturlige, måske!
gen AgeMan=age1
foreach var in  WaveFromFirst nch02 nch34 nch511 nch1215 nch1618 NumKids nkids nchild Childless Age1 Age2 Age3 Age4 age ExpFirst NChildren NnChildren noBaby NatChildren BioBaby  Divorce DivorcewaveM NoGirls{
	g `var' = `var'2
	drop `var'1 `var'2
}
replace TargetChild=ExpNChild+NnChildren
**********************************************************************************
* Asign households variables as the husband's only                                       *
**********************************************************************************
foreach var in White  lprnt {
	g `var' = `var'1
	drop `var'1 `var'2
}
g Highskilled = Highskilled1
* Ensure that Highskilled dummy is set to the highest value
by hid: g tHighskilled = sum(Highskilled)
by hid: g MaxHigh 	= tHighskilled[_N]>1

replace Highskilled = MaxHigh
drop MaxHigh

xtset hid wave


**********************************************************************************
* Asign row-total to generate household variables                                      *
**********************************************************************************
foreach var in Food saved Wealth IMP2savings IMP2debty IMP2nvestk {
	egen `var' = rowtotal(`var'1 `var'2)
	replace `var' = . if missing(`var'1) & missing(`var'2)
	drop `var'1 `var'2
}

gen save = save1==1 & !missing(save1) | save2==1 & !missing(save2)
replace save = . if missing(save1) & missing(save2)

gen Income=Income1+Income2 /*THOMAS: what about missings here?*/


gen AnnualPay=AnnualPay1+AnnualPay2

gen SavingRate = saved/Income
by hid: egen ChildlessExpChild=min(cond(!missing(ExpChild) & NumKids==0),ExpChild,.)

egen SaveForChildren 	= rowtotal(SaveForChildren1 SaveForChildren2)
replace SaveForChildren = SaveForChildren>0
replace SaveForChildren = . if missing(SaveForChildren1) &  missing(SaveForChildren2)


*****************************************************
* Construct arrival age using the restricted sample:

cap drop ArrivalAge_new ArrivalAge1
g ArrivalAge_new = age if NChildren==1 & L.NChildren==0
by hid: egen ArrivalAge1 = min(cond(!missing(ArrivalAge_new)),ArrivalAge_new,.)
cap drop ArrivalAge2
cap drop ArrivalAge_new 
g ArrivalAge_new = age if NChildren==2 & L.NChildren==1
by hid: egen ArrivalAge2 = min(cond(!missing(ArrivalAge_new)),ArrivalAge_new,.)
cap drop ArrivalAge3
cap drop ArrivalAge_new 
g ArrivalAge_new = age if NChildren==3 & L.NChildren==2
by hid: egen ArrivalAge3 = min(cond(!missing(ArrivalAge_new)),ArrivalAge_new,.)

********************************************************************************
*Fill in expectations 
********************************************************************************
sort hid wave
gen TargetChildUpdate=TargetChild

/* take care of pregnan women */

forvalue i= 2/18 {
	replace TargetChildUpdate=l.TargetChild if wave==`i' & missing(TargetChild) 
	replace TargetChildUpdate=l2.TargetChild if wave==`i' & missing(TargetChildUpdate)
	replace TargetChildUpdate=l3.TargetChild if wave==`i' & missing(TargetChildUpdate)
	replace TargetChildUpdate=l4.TargetChild if wave==`i' & missing(TargetChildUpdate)
	replace TargetChildUpdate=l5.TargetChild if wave==`i' & missing(TargetChildUpdate)
	replace TargetChildUpdate=l6.TargetChild if wave==`i' & missing(TargetChildUpdate)
	
}

gen ExpChildUpdate=ExpChild

forvalue i= 2/18 {
	replace ExpChildUpdate=l.ExpChild if wave==`i' & missing(ExpChild) 
	replace ExpChildUpdate=l2.ExpChild if wave==`i' & missing(ExpChildUpdate) 
	replace ExpChildUpdate=l3.ExpChild if wave==`i' & missing(ExpChildUpdate) 
	replace ExpChildUpdate=l4.ExpChild if wave==`i' & missing(ExpChildUpdate) 
	replace ExpChildUpdate=l5.ExpChild if wave==`i' & missing(ExpChildUpdate) 
	replace ExpChildUpdate=l6.ExpChild if wave==`i' & missing(ExpChildUpdate) 
	
    replace ExpChildUpdate=0 if wave==`i' & missing(ExpChild) & NnChildren>=TargetChildUpdate & !missing(NnChildren)& !missing(TargetChildUpdate)
	
	}



********************************************************************************
* Restrict sample
***** "extreme observations" ************
set more off
forvalues wave=1/18{
	foreach var in Income Wealth {
		qui sum `var' if wave==`wave' ,detail
		scalar p1 = r(p1)
		scalar p99 = r(p99)
		replace `var' = . if `var'>=p99 & wave==`wave'
		replace `var' = . if `var'<=p1 & wave==`wave'
	}
}

* Extreme saving rate
sum SavingRate, detail
replace SavingRate = . if SavingRate>1 | missing(Income)

/* deflating to 200 prices (index 72.7 in 2000)*/

gen year=doiy41
replace year=1991 if missing(year)
sort year
drop _m
merge m:1 year using ${bhps_data}\cpi
destring cpi, gen(cpi1)
drop _m

gen IncomeU=Income*(72.7/cpi1)
gen WealthU=Wealth*(72.7/cpi1)


replace WealthU=. if WealthU<0

sort hid wave
tempfile HHPanel
save `HHPanel'
/* initial distribution */
keep if age>19 & age<25
sort hid

collapse (mean) IncomeI=IncomeU WealthI=WealthU, by(hid)

merge 1:m hid using `HHPanel'




/* sample selection */
drop if age>62
drop if age<20
tab wave
bys Highskilled: xtsum age
/* only keep the first marriage for earch wife */

xtsum Highskilled if Highskilled==1
xtsum Highskilled if Highskilled==0
tab wave

drop if SecMar ==1
xtsum Highskilled if Highskilled==1
xtsum Highskilled if Highskilled==0
tab wave 

drop if missing(NnChildren)
xtsum Highskilled if Highskilled==1
xtsum Highskilled if Highskilled==0
tab wave

**** Restrict the sample and construct arrivla age of children for the restricted sample
* resources
g resources = Wealth + Income
sort hid

by hid: g time = _n
by hid: egen firstresources_w 	= min(cond(!missing(resources)),time,.)
by hid: gen firstresources 		= resources[firstresources_w]

*drop if missing(firstresources) | firstresources<0
*drop if missing(SavingRate)

cap drop time firstresources firstresources_w
by hid: g time = _n
by hid: egen firstresources_w 	= min(cond(!missing(resources)),time,.)
by hid: gen firstresources 		= resources[firstresources_w]

by hid: gen firstNumKids 		= NumKids[firstresources_w]
by hid: gen firstAge     		= age[firstresources_w]





cap drop _merge

drop Income Wealth
rename IncomeU Income
rename WealthU Wealth

sort hid wave

gen Initial24=1 if age==24 & !missing(Wealth) & !missing(NnChildren) & !missing(Income)
label var Income "Income (in 2000 Â£)"
label var IncomeI "Initial Income (in 2000 Â£)"
label var Wealth "Wealth (in 2000 Â£)"
label var WealthI "Initial Wealth (in 2000 Â£)"
label var NnChildren "No Own Children"
label var nchil "No of own chilren in HH"
label var ExpChild "Expecting future children"
label var ExpChildUpdate "Imputed expecting future children"
label var SavingRate "Saving rate"
label var noBaby "No new born babies"
label var TargetChildUpdate "Imputed Target Children"
label var Initial24 "Information at 24"
label var OwnHouse "Own their house"
label var SaveForChildren "Saves for children"
label var WorkHours1 "Weakly work hours, husband"
label var WorkHours2 "Weakly work hours, wife"

drop Divorce
bys hid: egen Firstwave=min(wave)
gen Divorce=((Firstwave<DivorcewaveM) & !missing(DivorcewaveM))
 
keep hid pid2 doby* Income IncomeI Wealth WealthI NnChildren ExpChild ExpChildUpdate Divorce Highskilled age SavingRate noBaby nkids wave TargetChildUpdate ExpNChild nchild Initial24 OwnHouse SaveForChildren WorkHours* Married NoGirls
save ${output_data}\BHPSpanelAdjustedHouseholds, replace


