/* Append over years, 1999-2008 */

clear
cd C:\PTM_code\Preparation
set more off
/* cannot use 1998 b/c there is no info about household size at 1997, so that per capita info */

use Income_cons2008.dta, clear
append using Income_cons2007.dta
append using Income_cons2006.dta
append using Income_cons2005.dta
append using Income_cons2004.dta
append using Income_cons2003.dta
append using Income_cons2002.dta
append using Income_cons2001.dta
append using Income_cons2000.dta
append using Income_cons1999.dta


/* put year dummy */
generate d07=0
replace d07=1 if year==2007

generate d06=0
replace d06=1 if year==2006

generate d05=0
replace d05=1 if year==2005

generate d04=0
replace d04=1 if year==2004

generate d03=0
replace d03=1 if year==2003

generate d02=0
replace d02=1 if year==2002

generate d01=0
replace d01=1 if year==2001

generate d00=0
replace d00=1 if year==2000

generate d99=0
replace d99=1 if year==1999

sort hhid year
tsset hhid year

/*consumption taking log*/
generate ln_at_cons=log(at_cons)
generate ln_l_at_cons=log(l_at_cons)
generate ln_l2_at_cons=l.ln_l_at_cons
generate ln_l3_at_cons=l2.ln_l_at_cons
generate ln_l4_at_cons=l3.ln_l_at_cons

/*external IVs*/
generate l2_is_head  =l.l_is_head
generate l3_is_head  =l2.l_is_head

/*sample control*/
generate missing0=1
replace missing0=0 if hsize!=. &  l.hsize!=. & ln_at_cons!=. & ln_l_at_cons!=. & ln_l2_at_cons!=. & ln_l3_at_cons!=. 

generate exIV=1
replace exIV=0 if  l_is_head!=. & l2_is_head!=. & l3_is_head!=.
  
generate outlier_c="i"
replace outlier_c="o" if hhid==1519 

generate missing=1 
replace missing=0 if missing0==0 & exIV==0 & outlier_c=="i" & year!=2001

*keep if year>2001
drop if year==2008

sort year hhid
compress

#delimit
keep sample98 missing year hhid hsize d07 d06 d05 d04 d03 d02 d01 d00 d99 
ln_at_cons ln_l_at_cons ln_l2_at_cons ln_l3_at_cons ln_l4_at_cons 
l_is_head l2_is_head l3_is_head m65 f55 hsex hedu seoul nonspouse hage;
#delimit cr 	
cd C:\PTM_code\Estimation
outsheet using Data_1999_2007.txt, replace
*matwrite using Data_1999_2007.mat, replace

order missing

collapse (min) missing, by(hhid)
keep if missing==0

drop missing
generate missing_sfddm=hhid 
outsheet using Data_sfddm.txt, replace
*matwrite missing_sfddm[missing_sfddm] using Data_sfddm.mat, replace


* THIS IS TO MAKE AN ADDITIONAL INDICATOR FILE FOR HOMOSKEDASTICTY ASSUMPTION.
insheet using Data_1999_2007.txt, clear
xtset hhid year

rename missing missingA

/*sample control*/
generate missing0=1
replace missing0=0 if hsize!=. &  l.hsize!=. & ln_at_cons!=. & ln_l_at_cons!=. & ln_l2_at_cons!=. & ln_l3_at_cons!=. 

generate exIV=1
replace exIV=0 if  l_is_head!=. & l2_is_head!=. & l3_is_head!=.

generate outlier_c="i"
replace outlier_c="o" if hhid==1519 

generate missing=1 
replace missing=0 if missing0==0 & exIV==0 & outlier_c=="i" & year!=2001

generate h0=d.hsize
replace missing=1 if h0!=0  

keep if year>2001
order missing

collapse (min) missing, by(hhid)
keep if missing==0

drop missing

generate missing_sfddm=hhid 
outsheet using DataH_sfddm.txt, replace
*matwrite missing_sfddm[hhid] using DataH_sfddm, replace

