*********************************************************
************      2_Compile_Data_BHPS      **************
*********************************************************


*this .do file draws on code from Blundell and Etheridge (2010),  Compile_Data.do file.


clear all

local location "C:\Users\spyro\OneDrive - Lancaster University\Submitted_papers_Replication_files\Risk_UK_JAE_Feb_2021" 
use "`location'\\data\paneldataBHPSimp.dta", clear

******************************************************************************
***          1.Drop the proxy respondents/non-full interviews              ***
******************************************************************************
drop if ivfio>1 & ivfio<=99    

******************************************************************************
***2.Create an index for the head of the household as it is defined by BHPS***
******************************************************************************
replace hoh=0 if hoh==2
gen index=1 if hoh==1
egen index2=sum(index), by(hid)


******************************************************************************
***                 3.Process the sampling weights                         ***
******************************************************************************
gen hh_samp_weights_Cross_UK = hhwght
replace hh_samp_weights_Cross_UK= xhwght if year >= 1992
gen ind_samp_weights_Cross_UK=xrwght

******************************************************************************
***             4.Create a consistent age variable                         ***
******************************************************************************
* Problem is that surveys are done at different times of each year, and often report same
* ages in consecutive years, or jumps of a year.
generate age_old = age // the old age data
sort pid, stable
egen year_min = min(year), by(pid)
egen age_min = min(age), by(pid)
replace age = age_min + year - year_min


******************************************************************************
***                       5.Educational Bands                              ***
******************************************************************************
gen educ_bands_2 = .	
replace educ_bands_2 = 1 if qfedhi >= 1 & qfedhi <= 2
replace educ_bands_2 = 2 if qfedhi >= 3 & qfedhi <= 5
replace educ_bands_2 = 3 if qfedhi == 6
replace educ_bands_2 = 4 if qfedhi >= 7 & qfedhi <= 9
replace educ_bands_2 = 5 if qfedhi >= 10 & qfedhi <= 12
replace educ_bands_2 = 6 if qfedhi < 0 

label define educ_2 1 "Degree," 2 "other higher/diploma/teaching/nursing" 3 "A-levels/ AS level/ Highers (scot)/" 4 "GCSE/O level" 5 "Other qual/No qual"  6 "missing" 
label values educ_bands_2 educ_2

******************************************************************************
***                       6.Marital status                                 ***
******************************************************************************
gen marbin = (mastat == 1) // 1 is married
gen coupbin = (mastat == 1 | mastat == 2) // 2 is living together as a couple

label define marlab  0 " " 1 "Married"
label define couplab 0 " " 1 "Coupled"
label values marbin marlab
label values coupbin couplab

bys year hid: egen ncouple_h = sum(coupbin)
replace ncouple_h = ncouple_h / 2

******************************************************************************
***                          7.Regions(12 classes)                         ***
******************************************************************************

gen region_aux = .
replace region_aux = 1  if region == 1 | region == 2
replace region_aux = 2  if region == 3
replace region_aux = 3  if region == 4
replace region_aux = 4  if region == 5
replace region_aux = 5  if region == 6
replace region_aux = 6  if region == 7  | region == 8
replace region_aux = 7  if region == 9  | region == 10 | region == 11
replace region_aux = 8  if region == 12 | region == 13 | region == 14
replace region_aux = 9  if region == 15 | region == 16
replace region_aux = 10 if region == 17
replace region_aux = 11 if region == 18
replace region_aux = 12 if region == 19
drop region

rename region_aux region

******************************************************************************
***                          8.number of kids                              ***
******************************************************************************
rename nkids numhhkid

******************************************************************************
***      9.Create income variables without imputed values                  ***
******************************************************************************

gen annual_earn_i=fiyrl
gen paygu_i=paygu
gen paygty_i=paygti

replace fiyrl  = . if fiyrli == 1
replace paygu  = . if paygui == 1
replace paygu  = . if paygu <0

replace paygty  = . if paygui == 1
replace paygty  = . if paygu <0

******************************************************************************
***                 10.Create log-income variables                         ***
******************************************************************************
gen lfiyrl = log(fiyrl)
gen lpaygu = log(paygu)
gen lpaygu_i = log(paygu_i)
gen lpaygty = log(paygty)
gen lpaygty_i = log(paygty_i)
gen lhrs = log(jbhrs)

******************************************************************************
***                 11.Create weekly variables                             ***
******************************************************************************
drop paygu_i

* Try to get as close to the LFS data as possible: include only employees
replace lhrs = log(jshrs) if lhrs == . & jshrs > 0

* Generate weekly earnings.  For employed people we will scale up monthly "usual" wage as this best
*     represents wage rate.  Unfortunately, this may ignore bonuses etc.
gen     lab_earnings = lpaygu - log(52/12)
replace lpaygu_i = lpaygu_i- log(52/12)
gen     learnings = lpaygty - log(52/12)
								
* Generate annual and monthly hours
gen lhrs_an   = lhrs + log(52)
gen lhrs_mon  = lhrs + log(52/12)
gen lhrs_week = lhrs
* Generate hourly wage
* First use the weekly wage and hours data
gen lhourly_wage =  lab_earnings-lhrs_week
gen lhourly_wage_i = lpaygu_i-lhrs_week

gen hourly_wage = exp(lhourly_wage)
gen hourly_wage_i = exp(lhourly_wage_i)
gen hrs_an = exp(lhrs_an)
gen hrs_week = exp(lhrs_week)

gen YE_main=exp(lab_earnings)
gen paygu_i=exp(lpaygu_i)
gen Y_ind_L=exp(learnings)

rename hourly_wage W
rename hourly_wage_i W_i
rename hrs_week L
gen annual_earn=fiyrl
******************************************************************************
***                 12.Create equivalence scales                           ***
******************************************************************************

gen numads = hhsize - numhhkid

// The OECD scale
generate hh_weights = 1

*one parent families
replace hh_weights = 1 + (numads-1)*0.5 + numhhkid*0.3 if numads >= 1
replace hh_weights = 1 + (numhhkid-1)*0.3 if numads == 0 

******************************************************************************
***                 13.Deflate income variables by RPI                     ***
******************************************************************************

merge m:1 year using "`location'\\data\rpi_annual_2012.dta"
keep if _m==3

gen RPI= rpi_all

replace W      =W/RPI
replace W_i      =W_i/RPI
replace YE_main=YE_main/RPI
replace Y_ind_L=Y_ind_L/RPI
replace paygu_i=paygu_i/RPI
replace paygu=paygu/RPI
replace annual_earn=annual_earn/RPI
replace annual_earn_i=annual_earn_i/RPI
replace grpay=grpay/RPI

replace  hhyrln=hhyrln/RPI
replace  yrni =yrni/RPI
replace  yrcontr =yrcontr/RPI
replace  yrtaxnt =yrtaxnt/RPI
replace  yrtaxgr =yrtaxgr/RPI
replace  yrtaxcr =yrtaxcr/RPI
replace  yrdeduc =yrdeduc/RPI
replace  fihhyr =fihhyr/RPI
replace  fihhyl =fihhyl/RPI
replace  hhyrlg =hhyrlg/RPI
replace  hhyri =hhyri/RPI
replace  hhyrb =hhyrb/RPI
replace  hhyrp =hhyrp/RPI
replace  hhyrt  =hhyrt/RPI
replace  hhyneti =hhyneti/RPI

gen minimum_wage   = .
replace minimum_wage=2.07  if year==1991
replace minimum_wage=2.27  if year==1992
replace minimum_wage=2.48  if year==1993
replace minimum_wage=2.68  if year==1994
replace minimum_wage=2.88  if year==1995
replace minimum_wage=3.09  if year==1996
replace minimum_wage=3.30  if year==1997
replace minimum_wage=3.49  if year==1998
replace minimum_wage=3.60  if year==1999
replace minimum_wage=3.70  if year==2000
replace minimum_wage=4.10  if year==2001
replace minimum_wage=4.20  if year==2002
replace minimum_wage=4.50  if year==2003
replace minimum_wage=4.85  if year==2004
replace minimum_wage=5.05  if year==2005
replace minimum_wage=5.35  if year==2006
replace minimum_wage=5.52  if year==2007
replace minimum_wage=5.73  if year==2008
replace minimum_wage=minimum_wage/RPI
gen threshold=13*40*minimum_wage/2


******************************************************************************
***      14.Within household allocation of the Head of household           ***
******************************************************************************
//	
gen sex_aux = (hgsex == 1 & age > 16)
bys year hid: egen num_mat_men = sum(sex_aux)

gen work_men = hgsex == 1 & jbhas == 1
bys year hid: egen num_work_men = sum(work_men)

gen work_women = hgsex == 2 & jbhas == 1
bys year hid: egen num_work_women = sum(work_women)

gen sex_aux_h = (sex == 1 & age > 16)
bys year hid: egen num_mat_men_h = sum(sex_aux_h)

gen work_men_h = sex == 1 & jbhas == 1
bys year hid: egen num_work_men_h = sum(work_men_h)

gen work_women_h = sex == 2 & jbhas == 1
bys year hid: egen num_work_women_h = sum(work_women_h)

gen hoh_ind = 0
gen spouse= 0

replace jbhas = 10 if jbhas < 0   //Change the number on "missings" and "don't knows"

* First do the married men, living as a couple

gsort year hid sex - coupbin - age pno
replace hoh_ind = 1 if hid[_n-1] != hid & sex == 1 & coupbin == 1 & ncouple_h >0
gsort year hid - sex - coupbin - age pno
replace spouse = 1 if hid[_n-1] != hid & sex == 2 & coupbin == 1 & ncouple_h >0   // Need to check that the oldest female in a couple is actually married to the oldest male
cap drop done_hoh_ben
bys year hid: egen done_hoh_ben = max(hoh_ind)

* Then do working men
gsort year hid sex jbhas - age pno
replace hoh_ind = 3 if done_hoh_ben == 0 & num_work_men_h >= 1 & hid[_n-1] != hid[_n] & sex == 1 & jbhas == 1 & ncouple_h >= 0
cap drop done_hoh_ben
bys year hid: egen done_hoh_ben = max(hoh_ind)

* Then do working women, if there are no working men
gsort year hid - sex jbhas - age pno
replace hoh_ind = 4 if done_hoh_ben == 0 & num_work_men_h == 0 & hid[_n-1] != hid & sex == 2 & jbhas == 1 & ncouple_h >= 0
cap drop done_hoh_ben
bys year hid: egen done_hoh_ben = max(hoh_ind)

* Then do oldest mature men, if there are no working people
gsort year hid sex jbhas - age pno
replace hoh_ind = 5 if done_hoh_ben == 0 & num_mat_men_h > 0 & num_work_men_h == 0 & num_work_women_h == 0 & hid[_n-1] != hid & sex == 1 & ncouple_h >= 0
cap drop done_hoh_ben
bys year hid: egen done_hoh_ben = max(hoh_ind)

* Then do oldest person, if there are no adult men and no working people
gsort year hid - age pno
replace hoh_ind = 6 if done_hoh_ben == 0 & num_mat_men_h == 0 & num_work_men_h == 0 & num_work_women_h == 0 & hid[_n-1] != hid & ncouple_h >= 0
cap drop done_hoh_ben
bys year hid: egen done_hoh_ben = max(hoh_ind)

replace hoh=hoh_ind if index2==0
replace hoh=1 if hoh>0

*** In the 3_moments_generation_rpi.do and for the household risk, we check 
*** further whether the spouses are specified correctly according to the head

******************************************************************************
***      15.Head of household's demographics and earnings                  ***
******************************************************************************

gen sex_h_aux = sex if hoh_ind > 0
gen age_h_aux = age if hoh_ind > 0
gen educ_h_aux = educ_bands_2 if hoh_ind > 0
gen marstat_h_aux = mastat if hoh_ind > 0

gen sex_s_aux = sex if spouse > 0
gen age_s_aux = age if spouse > 0
gen educ_s_aux = educ_bands_2 if spouse > 0
gen marstat_s_aux = mastat if spouse > 0

egen sex_h     = max(sex_h_aux), by(hid year)
egen age_h     = max(age_h_aux), by(hid year)
egen educ_h    = max(educ_h_aux), by(hid year)
egen marstat_h = max(marstat_h_aux), by(hid year)

egen sex_s     = max(sex_s_aux), by(hid year)
egen age_s     = max(age_s_aux), by(hid year)
egen educ_s    = max(educ_s_aux), by(hid year)
egen marstat_s = max(marstat_s_aux), by(hid year)

******************************************************************************
***                  16.Rename some variables                              ***
******************************************************************************
rename jbstat emppos
rename jbft ftpt

gen     educ_att= .
replace educ_att = 1 if educ_bands_2 == 1
replace educ_att = 2 if educ_bands_2 == 2
replace educ_att = 3 if educ_bands_2 == 3
replace educ_att = 4 if educ_bands_2 == 4
replace educ_att = 5 if educ_bands_2 == 5
replace educ_att = . if educ_bands_2 > 5

label define educ_label 1 "Degree," 2 "other higher/diploma/teaching/nursing" 3 "A-levels/ AS level/ Highers (scot)/" 4 "GCSE/O level" 5 "Other qual/No qual"  6 "missing" 
label values educ_att educ_label

******************************************************************************
*** 17.Keep pnly the variables we need to make calculations faster         ***
******************************************************************************


# delimit ;
order year hid pno pid hoh hoh_ind hh_samp_weights_Cross_UK ind_samp_weights_Cross_UK psu strata sampst     /* identifiers and weights*/
	  educ_s age_s fiyrli hgspn threshold scend feend jbsemp marbin coupbin mastat qfedhi ivfio ivfho memorig njbwks njuwks njiwks njbsp njusp njisp njbs jbrgsc   emppos ftpt  age_h age sex_h sex numads numhhkid region region2 spouse educ_h  educ_att marstat_h  jbhas  hh_weights educ_bands_2 jssize jbsize jlsize mrjsize jbsect jbsoc_cc jlsoc_cc mrjsoc_cc cjsbgy4/* demographic information */
	  W W_i L Y_ind_L YE_main paygu_i minimum_wage annual_earn_i hhyrln yrni yrcontr yrtaxnt yrtaxgr yrtaxcr yrdeduc fihhyr fihhyl hhyrlg hhyri hhyrb hhyrp hhyrt  hhyneti fihhyri fihhyli fihhypi hhyrbi hhyrti hhyrii hhyrlni grpay annual_earn;							/* variables of interest */
# delimit cr
keep year-annual_earn

	


******************************************************************************
***                     18.Label some variables                            ***
******************************************************************************

label variable hoh_ind        	"Head-of-household indicator derived"
label variable hoh        	    "Head-of-household indicator BHPS"
label variable spouse			"is a spouse"
label variable numads			"Number of HH adults"
label variable numhhkid			"Number of HH children"
label variable educ_h		    "Head's education"
label variable age_h		    "Head's age"
label variable educ_s		    "Spouse's education"
label variable age_s		    "Spouse's age"
label variable educ_att		    "Educational attainment"
label variable W 	         	"Hourly Wage"
label variable W_i 	         	"Hourly Wage (including imputed values)"
label variable L        	  	"Weekly Hours worked"
label variable YE_main			"Adult weekly labour earnings"
label variable annual_earn		"Individual Annual Earnings"
label variable annual_earn_i	"Individual Annual Earnings (including imputed values)"
label variable threshold		"minimum annual earnings threshold"
label variable hh_weights		"Equivalence Scale"


label define htype_l 1 "couple" 3 "oldest_work_male" 4 "oldest_working_female" 5 "oldest_male" 6 "oldest_female"
label values hoh_ind htype_l

# delimit ;
label define region_l 	1 "London" 2 "South East" 3 "South West" 4 "East" 5 "East Midlands" 6 "West Midlands"
 			 			7 "North West" 8 "Yorkshire & Humber" 9 "North East" 10 "Wales" 11 "Scotland" 12 "northern ireland";
# delimit cr
label values region region_l


drop if year>2013
drop if region2==13
replace region=region2 if region>= .


******************************************************************************
***           19.Save and Export for further processing                    ***
******************************************************************************

sort pid year		
save  "`location'\\data\paneldataBHPS_UK.dta", replace


















