* This do file constructs imputed values of wealth and applies sample-restrictions
clear all

use ${output_data}\BHPSpanel, replace
xtset pid wave 

********************************************************************************************
* Construct variables to be used in the subsequent sample selection and/or analysis
*-------------------------------------------------------------------------------------------
* Recode many variables to missing if negative (inapplicable)
foreach var of varlist bwtag* race age nkids pay* jbstat isced sex nch* savey* lchmor* lchnmor xpfood ncars carown fiyr* jbhrs jbot hsownd doby{
	replace `var' = . if `var'<0 
}

***
* AGE, such that it always increases with one year across one wave
cap drop Age
g Age = age
*bysort pid: g Age = age[_n-1] + 1 if !missing(age[_n-1]) /*Corrected 5/2-2015, Thomas*/
bysort pid: replace Age = Age[_n-1] + (wave[_n] - wave[_n-1]) if !missing(Age[_n-1]) /*Corrected 5/2-2015, Thomas*/
*replace Age = age if missing(Age)

drop age
ren Age age



***
* NUMBER OF CHILDREN
cap drop NumKids
g NumKids = nchild 

* Childless
cap drop Childless
g Childless = 0
replace Childless = 1 if nkids==0 & nchild==0
replace Childless = . if missing(nkids)

* ME: only available for wave 9  
* age
forvalues age=1/4{
cap drop Age`age' wave`age'
by pid: egen Age`age' = min(cond(!missing(bwtag`age'),bwtag`age',.))
by pid: egen wave`age' = min(cond(!missing(bwtag`age'),wave,.))
replace Age`age' = Age`age' + (wave-wave`age')
*replace Age`age' = . if Age`age'<0
cap drop wave`age'
}
drop bw*


***
* EXPECTATIONS ABOUT CHILDREN
* There is few (one) which report that they expect to have children but report the number to be zero..
replace lchmor = 0 if  lchmor==1 & (( lchnmor ==0 & wave==8) | (lchmorn ==0) )
cap drop ExpChild
g ExpChild = . /*Don't know (-1,8) and currently pregnant (2) is included as NO */


replace ExpChild = 1 if inlist(lchmor,1)  /* Include pregnan women */
replace ExpChild = 0 if inlist(lchmor,3) 

* number expected. In wave 8: lchnmor else lchmorn
replace lchmorn = lchnmor if wave==8
drop lchnmor
cap drop ExpNChild
g ExpNChild = . 
replace ExpNChild = 0 if ExpChild == 0 
replace ExpNChild = 1 if inlist(lchmorn,1) 
replace ExpNChild = 2 if inlist(lchmorn,2) 
replace ExpNChild = 3 if inlist(lchmorn,3) 
replace ExpNChild = lchmorn if lchmorn>=4 & !missing(lchmorn)
* Wave 11: a large proportion of individuals are classified as "inapplicable" and that wave is then not used->generate missings
tab lchmor wave, col m
replace ExpChild  = . if wave==11
replace ExpNChild = . if wave==11

/* remove inconsistencies */
replace ExpNChild=. if ExpChild==1 & ExpNChild==0

* Target number of children (own)
cap drop TargetChild
g TargetChild = ExpNChild + NnChildren



***
* REASONS FOR SAVING: use the first reason from waves where two could be given
replace savey1 = savey if missing(savey1)
drop 	savey2 savey
ren 	savey1 savey
cap drop SaveForChildren
g SaveForChildren = 0
replace SaveForChildren = (savey==4) & !missing(savey) 
replace SaveForChildren = . if missing(savey) 


* No. of natural children parent to (not necesarrily in the household) NOT USED. There is some inconsistency: many has no natural children but still have a number pof natural children in the household
* This variable corresponds quite well with BioKids 
*replace lnprnt = 0 if lnprnt ==-8
replace lnprnt = . if lnprnt<0

***
* WAVE FIRST CHILD ARRIVES
sort pid wave
cap drop FirstChild
g FirstChild = (D.NChildren>0 & !missing(D.NChildren) & NChildren==1)
* Construct missing if never children in sample
cap drop temp
by pid: gen temp = sum(FirstChild)
by pid: replace FirstChild = . if temp[_N]<1
* Generate wave related to first child
cap drop temp
by pid: egen temp = max(FirstChild*wave)
cap drop WaveFromFirst
g WaveFromFirst = wave - temp
drop temp FirstChild

* Construct a variable that is constant about expectations of children BEFORE having children.
cap drop ExpFirst
by pid: egen ExpFirst = max(cond(WaveFromFirst<0),ExpChild,.)

*br pid hid wave mlstat ExpFirst* WaveFromFirst* NumKids* ExpChil* if pid==70751498


***
* GENDER
cap drop Male
g Male = .
replace Male = 1 if sex==1
replace Male = 0 if sex==2

***
* MARITAL STATUS. Use mlstat or hgr2r=3-> live-in partner. mlsata=6-> in a civil partnership (later waves and few)
* khhtype: more broad
cap drop Married
g Married = 0
replace Married = 1 if mlstat == 1
replace Married = . if missing(mlstat)

***
* RACE: White (race) The race is determined in the first wave and then only updated for first-time interviews
cap drop temp
g temp = 0
replace temp = 1 if race==1
replace temp = . if missing(race)
sort pid wave
cap drop White
g White = temp
by pid: replace White = White[_n-1] if missing(White[_n])
by pid: replace White = White[_n+1] if missing(White[_n])
drop temp race*

***
* HOMEOWNER (hsownd, hscost: price when bought)
cap drop OwnHouse
g OwnHouse = .
replace OwnHouse = 1 if inlist(hsownd,1,2) 		/*owned, mortaged and shared owned*/
replace OwnHouse = 0 if inlist(hsownd,3,4,5) 	/*rented, rent-free and other*/

***
* SAVINGS 
replace save = 2 - save /* recode from 1: yes, 2: no to 0: no, 1:yes*/
replace saved = saved*12 						/* Annual*/
replace saved = 0 if missing(saved) & save==0 	/* Include zero saving*/
cap drop IMP2savings 
egen IMP2savings 	= rowtotal(IMP2bankk IMP2savek IMP2svack) /*Savings are based on several (mutually exclusive) variables*/
replace IMP2savings = . if missing(IMP2bankk) & missing(IMP2savek) & missing(IMP2svack)

* impute liquid wealth
*by pid: egen firstwealth = min(cond(!missing(IMP2savings)),IMP2savings,.) 
by pid: g time = _n
by pid: egen firstwealth_w = min(cond(!missing(IMP2savings)),time,.) 
by pid: gen firstwealth = IMP2savings[firstwealth_w]

cap drop Fsaved 
g Fsaved = F.saved

cap drop temp
g temp = 1/wave
sort pid temp

cap drop Accum
by pid: gen Accum = sum(Fsaved) if time<firstwealth_w & !missing(firstwealth_w)

sort pid wave

replace Accum = 0 if time==firstwealth_w
by pid: replace Accum = -sum(saved) if time>firstwealth_w & !missing(firstwealth_w)


cap drop Wealth 
g Wealth = firstwealth - Accum

corr IMP2savings Wealth if time != firstwealth_w
corr IMP2savings Wealth if time != firstwealth_w & age<=35



***
* CONSUMPTION (food and cars)
* food is bracketed, but the first wave it was not bracketed. use midpoint/average of bins
cap drop Food
g Food = 5*(xpfood==1) + 15*(xpfood==2) + 25*(xpfood==3) + 35*(xpfood==4) + 45*(xpfood==5) + 55*(xpfood==6) + 70*(xpfood==7) + 90*(xpfood==8) + 110*(xpfood==9) + 130*(xpfood==10) + 150*(xpfood==11) + 180*(xpfood==12)
replace Food = xpfood if wave==1
replace Food = Food*52 /*on an annual basis*/

cap drop Ncars
g Ncars = ncars
replace Ncars = 0 if carown == 2 /*Company car*/
cap drop OwnCar 
g OwnCar = Ncars>0
replace OwnCar=. if missing(Ncar)


***
* EDUCATION (isced)
/*
Not defined	0	234	1.2	1.3
Primary	1	5110	27.1	28.3
low secondary	2	195	1.0	1.1
3c:low sec-voc	3	5366	28.4	29.7
3a:hisec-mivoc	4	2099	11.1	11.6
5b:higher voc	5	3003	15.9	16.6
5a:first degree	6	1637	8.7	9.1
6:higher degree	7	422	2.2	2.3
Proxy and or phone	-7	801	4.2	Missing
*/
cap drop Highskilled
g Highskilled = 0
replace Highskilled = 1 if isced>=6 /*High skilled is "first degree" first or second stage of tertiary education (ISCED 5 and 6: http://www.uis.unesco.org/Library/Documents/isced97-en.pdf)*/
replace Highskilled = . if missing(isced)

* EMPLOYMENT (JBSTAT)
*Self employed..................................................................... 01
*In paid employment ........................................................... 02
*(full or part-time) ............................................ ASK D14
*Unemployed....................................................................... 03
*Retired from paid work altogether ...................................... 04
*On maternity leave............................................................. 05
*Looking after family or home ............................................. 06
*Full-time student/ at school ................................................ 07 GO TO D16
*Long term sick or disabled ................................................. 08
*On a government training scheme...................................... 09 ASK D14
*Something else (PLEASE GIVE DETAILS) 10
cap drop Working
g Working = . /*Permanently out of the labor market or the last two catagories*/
replace Working = 1 if inlist(jbstat,1,2) /*Wage work and self-employement. Self-employed may be excluded from the study*/
replace Working = 0 if inlist(jbstat,3,5,6,7) /*"Temporaryly out of the labor market*/


***
* INCOME. Use avg/usual pay (payu) for the number of weeks (payuw). The documentation suggests that the coding is different than a histogram suggests. Alternatively, there are income supplements containing imputed income
cap drop wpay 
g 		wpay = payu/payuw 							/* "usual" gross pay*/
replace wpay = paygl/paygw if missing(wpay)			/* Last gross pay, if no "usual"*/
replace wpay = (paynl/paynw)*1.4 if missing(wpay) 	/* Add 40 percent tax to weekly net-pay if missing gross pay*/
cap drop AnnualPay 
g AnnualPay =  wpay * 52 
drop wpay pay*
* Income: Annual (fiyrl). Zero annual income is in the raw variable
cap drop Income
g Income = fiyr
*g Income = fiyrl
* THOMAS: DO NOT USE fiyrl, USE fiyr: TOTAL INCOME (NOT ONLY LABER MARKET!)

* Construct labor hours
cap drop WorkHours
egen WorkHours = rowtotal(jbhrs jbot)
replace WorkHours = . if missing(jbhrs) & missing(jbot) 


*hist Income if Income <10000 & Income>0
*sum Income if Income < .01
* Impute savings for wave 6 7 and 8 using stock in wave 5 and avg. monthly savings in subsequent waves
*bysort jbstat: sum AnnualPay if Income==0

********************************************************************************************

********************************************************************************************
* Apply sample selection criteria and store the number of observations "lost"
*-------------------------------------------------------------------------------------------
* Permanently outside the labor market
*keep if !missing(Working)

********************************************************************************************

save ${output_data}\BHPSpanelAdjusted, replace
