/*
This program computes the summary statistics and OLS 
	results reported in the paper.

Running this program will re-create Tables 1, 3, D1, 
	and D4. They will be output to ../output as 
	.csv files.
*/

log using ${dopath}/p3_Sumstats_OLS.log, replace



***********************************
*1. TABLE 1
***********************************
* part a - panel sample waterfall
use ${mydatapath}/GSFadminlongDOA_19782011Baseline.dta, clear
eststo clear
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1, listwise
replace waterfall_samp=0 if nomisspostBOTH==0
gen compsamp=(waterfall_samp==1) /*This sample, baseline + no missing earnings post-schooling, will be used as a comparison sample to the main sample in Table 3*/
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1, listwise
replace waterfall_samp=0 if logearningsobsBOTH<34
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1, listwise
replace waterfall_samp=0 if always_gte_earnings_thresh==0
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1, listwise
replace waterfall_samp=0 if schoolchanges2<1
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1, listwise
esttab using ${outputpath}/Table1.csv, cells("mean sd") mtitles("Panel - Baseline Sample" "Panel - Plus Positive Earnings After Schooling" "Panel - Plus Positive DER Earnings 1978-2011" "Panel - Plus Meets Annual Minimum Earnings" "Panel - Plus At Least One Schooling Change During 1978-2011") nodepvar label replace
*save this dataset, with the compsamp variable, for use below for comp samp regressions in Table 3
save ${mydatapath}/Compsampdata.dta, replace

* part b - cross-ection sample waterfall
use ${mydatapath}/GSFadminlongDOA_19782011Baseline.dta, clear
eststo clear
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1 & year==1990, listwise
replace waterfall_samp=0 if nomisspostBOTH==0
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1 & year==1990, listwise
replace waterfall_samp=0 if logearningsobsBOTH<34
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1 & year==1990, listwise
replace waterfall_samp=0 if always_gte_earnings_thresh==0
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1 & year==1990, listwise
replace waterfall_samp=0 if schoolchanges2<1
eststo: estpost summarize total_der_cpi educyears2 age black otherrace hispanic foreign_born married birthyear if waterfall_samp==1 & year==1990, listwise
esttab using ${outputpath}/Table1.csv, cells("mean sd") mtitles("CS - Baseline Sample" "CS - Plus Positive Earnings After Schooling" "CS - Plus Positive DER Earnings 1978-2011" "CS - Plus Meets Annual Minimum Earnings" "CS - Plus At Least One Schooling Change During 1978-2011") nodepvar label append





***********************************
*2. TABLE 3
***********************************
local depvar log_total_der_cpi
local sample 1ch
global spec1 ageq ageq2


*cross-section, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear

eststo clear
fvset base 1958 birthyear
eststo: reg `depvar' educyears2 $spec1 if insampleBOTH2_`sample'==1 & year==1990, robust

eststo: ivregress  2sls `depvar' $spec1 (educyears2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1 & year==1990, vce(robust) first


*cross-section, comparison sample
use ${mydatapath}/Compsampdata.dta, clear

fvset base 1958 birthyear
eststo: reg `depvar' educyears2 $spec1 if compsamp==1 & year==1990, cluster(personid)

eststo: ivregress  2sls `depvar' $spec1 (educyears2 = i.birthquarter#i.birthyear) if compsamp==1 & year==1990, vce(cluster personid) first


*panel, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear

xtset personid year
eststo: xtreg `depvar' educyears2 $spec1 if insampleBOTH2_`sample'==1, fe cluster(personid)
preserve
predict ResOLS_saa_panel, e
keep if insampleBOTH2_`sample'==1
keep ResOLS_saa_panel year personid
reshape wide ResOLS_saa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saaFEi_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

xtset personid year
eststo: reg `depvar' educyears2 $spec1 i.year if insampleBOTH2_`sample'==1, cluster(personid)
preserve
predict ResOLS_saa_panel, res
keep if insampleBOTH2_`sample'==1
keep ResOLS_saa_panel year personid
reshape wide ResOLS_saa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saaFEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

eststo: ivregress  2sls `depvar' $spec1 i.year (educyears2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1, vce(cluster personid) first
preserve
predict ResIV_saa_panel, res
keep if insampleBOTH2_`sample'==1
keep ResIV_saa_panel year personid
reshape wide ResIV_saa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResIV_saaFEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

esttab using ${outputpath}/Table3.csv, b se mtitles("OLS" "2SLS" "OLS" "2SLS" "OLS" "OLS" "2SLS") keep(educyears2) stats(N) replace






***********************************
*3. TABLE D1
***********************************
*** PANEL A
local depvar log_total_der_cpi
local sample 2ch
global spec1 ageq ageq2


*cross-section, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
sum educyears2 if insampleBOTH2_`sample'==1 & year==1990
local educyears_meancs1990=r(mean)

eststo clear
fvset base 1958 birthyear
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 if insampleBOTH2_`sample'==1 & year==1990, robust
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'

eststo: ivregress  2sls `depvar' $spec1 (educyears2 educyears_sq2= i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1 & year==1990, vce(robust) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'


*cross-section, comparison sample
use ${mydatapath}/Compsampdata.dta, clear
sum educyears2 if compsamp==1 & year==1990
local educyears_meancs1990=r(mean)

fvset base 1958 birthyear
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 if compsamp==1 & year==1990, cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'

eststo: ivregress  2sls `depvar' $spec1 (educyears2 educyears_sq2 = i.birthquarter#i.birthyear) if compsamp==1 & year==1990, vce(cluster personid) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'


*panel, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
sum educyears2 if insampleBOTH2_`sample'==1
local educyears_mean=r(mean)

xtset personid year
eststo: xtreg `depvar' educyears2 educyears_sq2 $spec1 if insampleBOTH2_`sample'==1, fe cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResOLS_ssaa_panel, e
keep if insampleBOTH2_`sample'==1
keep ResOLS_ssaa_panel year personid
reshape wide ResOLS_ssaa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_ssaaFEi_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

xtset personid year
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 i.year if insampleBOTH2_`sample'==1, cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResOLS_ssaa_panel, res
keep if insampleBOTH2_`sample'==1
keep ResOLS_ssaa_panel year personid
reshape wide ResOLS_ssaa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_ssaaFEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

eststo: ivregress  2sls `depvar' $spec1 i.year (educyears2 educyears_sq2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1, vce(cluster personid) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResIV_ssaa_panel, res
keep if insampleBOTH2_`sample'==1
keep ResIV_ssaa_panel year personid
reshape wide ResIV_ssaa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResIV_ssaaFEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore


esttab using ${outputpath}/TableD1.csv, mtitles("OLS" "2SLS" "OLS" "2SLS" "OLS" "OLS" "2SLS") drop(*) stats(marginal_return se N) replace




*** PANEL B
local depvar log_total_der_cpi
local sample 1ch
global spec1 ageq ageq2 ageq4


*cross-section, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear

eststo clear
fvset base 1958 birthyear
eststo: reg `depvar' educyears2 $spec1 if insampleBOTH2_`sample'==1 & year==1990, robust

eststo: ivregress  2sls `depvar' $spec1 (educyears2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1 & year==1990, vce(robust) first


*cross-section, comparison sample
use ${mydatapath}/Compsampdata.dta, clear

fvset base 1958 birthyear
eststo: reg `depvar' educyears2 $spec1 if compsamp==1 & year==1990, cluster(personid)

eststo: ivregress  2sls `depvar' $spec1 (educyears2 = i.birthquarter#i.birthyear) if compsamp==1 & year==1990, vce(cluster personid) first


*panel, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear

xtset personid year
eststo: xtreg `depvar' educyears2 $spec1 if insampleBOTH2_`sample'==1, fe cluster(personid)
preserve
predict ResOLS_saa2a4_panel, e
keep if insampleBOTH2_`sample'==1
keep ResOLS_saa2a4_panel year personid
reshape wide ResOLS_saa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saa2a4FEi_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

xtset personid year
eststo: reg `depvar' educyears2 $spec1 i.year if insampleBOTH2_`sample'==1, cluster(personid)
preserve
predict ResOLS_saa2a4_panel, res
keep if insampleBOTH2_`sample'==1
keep ResOLS_saa2a4_panel year personid
reshape wide ResOLS_saa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saa2a4FEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

eststo: ivregress  2sls `depvar' $spec1 i.year (educyears2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1, vce(cluster personid) first
preserve
predict ResIV_saa2a4_panel, res
keep if insampleBOTH2_`sample'==1
keep ResIV_saa2a4_panel year personid
reshape wide ResIV_saa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResIV_saa2a4FEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore


esttab using ${outputpath}/TableD1.csv, b se mtitles("OLS" "2SLS" "OLS" "2SLS" "OLS" "OLS" "2SLS") keep(educyears2) stats(N) append




*** PANEL C
local depvar log_total_der_cpi
local sample 2ch
global spec1 ageq ageq2 ageq4


*cross-section, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
sum educyears2 if insampleBOTH2_`sample'==1 & year==1990
local educyears_meancs1990=r(mean)

eststo clear
fvset base 1958 birthyear
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 if insampleBOTH2_`sample'==1 & year==1990, robust
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'

eststo: ivregress  2sls `depvar' $spec1 (educyears2 educyears_sq2= i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1 & year==1990, vce(robust) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'


*cross-section, comparison sample
use ${mydatapath}/Compsampdata.dta, clear
sum educyears2 if compsamp==1 & year==1990
local educyears_meancs1990=r(mean)

fvset base 1958 birthyear
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 if compsamp==1 & year==1990, cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'

eststo: ivregress  2sls `depvar' $spec1 (educyears2 educyears_sq2 = i.birthquarter#i.birthyear) if compsamp==1 & year==1990, vce(cluster personid) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_meancs1990'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'


*panel, main sample
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
sum educyears2 if insampleBOTH2_`sample'==1
local educyears_mean=r(mean)

xtset personid year
eststo: xtreg `depvar' educyears2 educyears_sq2 $spec1 if insampleBOTH2_`sample'==1, fe cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResOLS_ssaa2a4_panel, e
keep if insampleBOTH2_`sample'==1
keep ResOLS_ssaa2a4_panel year personid
reshape wide ResOLS_ssaa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_ssaa2a4FEi_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

xtset personid year
eststo: reg `depvar' educyears2 educyears_sq2 $spec1 i.year if insampleBOTH2_`sample'==1, cluster(personid)
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResOLS_ssaa2a4_panel, res
keep if insampleBOTH2_`sample'==1
keep ResOLS_ssaa2a4_panel year personid
reshape wide ResOLS_ssaa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_ssaa2a4FEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore

eststo: ivregress  2sls `depvar' $spec1 i.year (educyears2 educyears_sq2 = i.birthquarter#i.birthyear) if insampleBOTH2_`sample'==1, vce(cluster personid) first
lincom _b[educyears2]+2*_b[educyears_sq2]*`educyears_mean'
estadd scalar marginal_return=`r(estimate)'
estadd scalar se=`r(se)'
preserve
predict ResIV_ssaa2a4_panel, res
keep if insampleBOTH2_`sample'==1
keep ResIV_ssaa2a4_panel year personid
reshape wide ResIV_ssaa2a4_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResIV_ssaa2a4FEt_panel_insampleBOTH2_`sample'.xls, sheetreplace
restore


esttab using ${outputpath}/TableD1.csv, mtitles("OLS" "2SLS" "OLS" "2SLS" "OLS" "OLS" "2SLS") drop(*) stats(marginal_return se N) append






***********************************
*4. TABLE D4
***********************************
use ${mydatapath}/GSFadminlongDOA_19782011EarnRestrict.dta, clear
egen RxY=group(race year)
egen HxY=group(hispanic year)
egen FBxY=group(foreign_born year)
egen MxY=group(married year)
egen SxY=group(state year)
egen BYxY=group(birthyear year)

eststo clear

xtset personid year
eststo: xtreg log_total_der_cpi educyears2 ageq ageq2 i.RxY i.HxY i.FBxY i.MxY i.SxY i.BYxY if insampleBOTH2_1ch==1, fe cluster(personid)
preserve
predict ResOLS_saa_panel, e
keep if insampleBOTH2_1ch==1
keep ResOLS_saa_panel year personid
reshape wide ResOLS_saa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saaFEiDemoByYear_panel_insampleBOTH2_1ch.xls, sheetreplace
restore

eststo: reg log_total_der_cpi educyears2 ageq ageq2 i.year i.RxY i.HxY i.FBxY i.MxY i.SxY i.BYxY if insampleBOTH2_1ch==1, cluster(personid)
preserve
predict ResOLS_saa_panel, res
keep if insampleBOTH2_1ch==1
keep ResOLS_saa_panel year personid
reshape wide ResOLS_saa_panel, i(personid) j(year)
drop personid
export excel using ${mydatapath}/ResOLS_saaFEtDemoByYear_panel_insampleBOTH2_1ch.xls, sheetreplace
restore

esttab using ${outputpath}/TableD4.csv, b se mtitles("OLS" "OLS") replace keep(educyears2) stats(N)


log close

