/*
This program takes the GSF/SSB data, keeps the 
	variables of interest, and reshapes the 
	data into a long format needed for analysis.
*/

log using ${dopath}/p1_DataPrep.log, replace


local longvars_admin defer_der_fica_* defer_der_nonfica_* total_der_fica_* total_der_nonfica_* totearn_ser_* wqc_yrtot_*
local longvars_admin_STUB defer_der_fica_ defer_der_nonfica_ total_der_fica_ total_der_nonfica_


use ${datapath}/${dataname}.dta, clear

*create a variable measuring years with positive earnings prior to 1978
foreach n of numlist 1951/1977 {
gen ser_posearn_`n'=(totearn_ser_`n'>0 & !missing(totearn_ser_`n'))
tab ser_posearn_`n'
}
egen exp_ser1977=rowtotal(ser_posearn_*)
sum exp_ser1977

*keep variables of interest and reshape
drop totearn_ser_1951-totearn_ser_1977 wqc_yrtot_1951-wqc_yrtot_1977
keep defer_der_fica_* defer_der_nonfica_* total_der_fica_* total_der_nonfica_* personid panel sipp_panel_beg_date sipp_panel_end_date male race hispanic foreign_born educ_5cat current_enroll_coll current_enroll_hs birthdate deathdate first_admin_birthdate mh1 mh2 mh3 mh4 mh5 mh6 mh7 mh8 mh_date1 mh_date2 mh_date3 mh_date4 mh_date5 mh_date6 mh_date7 mh_date8 state year_bach year_beg_posths year_end_hs year_end_posths exp_ser1977

reshape long `longvars_admin_STUB', i(personid) j(year)

compress
save ${mydatapath}/GSFadminlong.dta, replace



import delimited ${mydatapath}/VZ_state_annual.csv, case(preserve)  clear
save ${mydatapath}/VZ_state_annual.dta, replace

import delimited ${mydatapath}/CPI_URS_19782017.csv, case(preserve)  clear
save ${mydatapath}/CPI_URS_19782017.dta, replace

log close
