/*******************************************************************
 This file suspends some sample restrictions and tests the 
 sensitivity of the main IV estimates for wages to alternative samples 
 and model specifications. It generates estimates Table 1 and 
 Figure C.1.
********************************************************************/

use "$sample\compuls.dta", clear
gen welle=year+1900

* sample restrictions 

gen dif = length - school /*generate length of school variable*/
gen diff_yos = dif < -2 | dif > 4 /*don't clean, generate dummy for to be cleaned */
drop dif

*drop if school2 == . | exp == . | lnw == . /*don't drop missings for education*/
drop if lnw == .
gen edu_mis = ed == .  /*generate dummy for missing education*/
gen school_mis = school2 == . /*generate dummy for missing schooling*/

*generate indicators for too large and too small values (1 and 99 percentile)
gen yos_1_99 = length>=6 & length <=21 /*dummy for 6-21 years of schooling*/

keep if yob >= 30 & yob <= 60 /*cohorts 1930-1960*/
keep if age >= 19 & age <= 65 /*age 19-65*/
keep if bula < 11 /*only West German states (excl. Berlin)*/

gen yob_=yob
replace yob = yob+1900
gen yos=length /*rename years of schooling*/
gen lhwage=lnw /*rename wages*/

gen N_=. /*dummy for estimation sample*/
count

***** Estimations for Table 1: PvW’s main estimates: replication and sensitivity 
estimates drop _all

***** Panel A: Replication *****
global instr switch2
global contr age* female year79 year85 year92 year99  

qui ivregress 2sls lhwage i.yob i.bula $contr  i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		 ,vce(cluster clust) 
estimates store iv_l_0
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_l_0
qui test $instr
scalar F1=r(F)
estimates restore fs_l_0
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_l_0


***** Panel B: Sensitivity tests *****
***** S1: yob>=1931 *****
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& yob>=1931 ///
		,vce(cluster clust) 
estimates store iv_1931
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& yob>=1931 ///
		,vce(cluster clust)  
estimates store fs_1931
qui test $instr
scalar F1=r(F)
estimates restore fs_1931
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	& yob>=1931 ///
	,vce(cluster clust)  
estimates store rf_1931


***** S2: yob>=1945 *****
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& yob>=1945 ///
		,vce(cluster clust) 
estimates store iv_1945
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& yob>=1945 ///
		,vce(cluster clust)  
estimates store fs_1945
qui test $instr
scalar F1=r(F)
estimates restore fs_1945
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	& yob>=1945 ///
	,vce(cluster clust)  
estimates store rf_1945 


***** S3 & S4: temporary extensions *****
gen temp=bula==3 & yob>=1935 & yob<=1940
replace temp=1 if bula==5 & yob>=1935 & yob<=1939
replace temp=1 if bula==6 & ((yob>=1936 & yob<=1941) | yob==1933)
replace temp=1 if bula==7 & yob>=1938 & yob<=1942
replace temp=1 if bula==8 & yob>=1938 & yob<=1941
replace temp=1 if bula==9 & yob>=1938 & yob<=1943

preserve
* S4: combined effect
replace switch2=switch2+temp

global instr switch2 
global contr age* female year79  year85 year92 year99 

qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_temp2
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_temp2
qui test $instr
scalar F1=r(F)
estimates restore fs_temp2
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_temp2
restore

* S3: two instruments 
global instr switch2 temp
global contr age* female year79  year85 year92 year99 

qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 		
estimates store iv_temp1
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_temp1
qui test switch2
scalar F1=r(F)
estimates restore fs_temp1
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_temp1


***** S5: age 19-60 *****
global instr switch2 
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=19 & age<=60 ///
		,vce(cluster clust) 
estimates store iv_a19_60
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=19 & age<=60 ///
		,vce(cluster clust)  
estimates store fs_a19_60
qui test switch2
scalar F1=r(F)
estimates restore fs_a19_60
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	& age>=19 & age<=60 ///
	,vce(cluster clust)  
estimates store rf_a19_60


***** S6: age 19-55 *****
global instr switch2 
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=19 & age<=55 ///
		,vce(cluster clust) 
estimates store iv_a19_55
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=19 & age<=55 ///
		,vce(cluster clust)  
estimates store fs_a19_55
qui test $instr
scalar F1=r(F)
estimates restore fs_a19_55
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	& age>=19 & age<=55 ///
	,vce(cluster clust)  
estimates store rf_a19_55


***** S7: age 25-65 *****
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=25 & age<=65 ///
		,vce(cluster clust) 
estimates store iv_a25_65
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=25 & age<=65 ///
		,vce(cluster clust)  
estimates store fs_a25_65
qui test $instr
scalar F1=r(F)
estimates restore fs_a25_65
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	& age>=25 & age<=65 ///
	,vce(cluster clust)  
estimates store rf_a25_65 


***** S8: outliers for wages*****
preserve
foreach welle of numlist 1979 1985 1992 1999  {
qui centile w if welle==`welle' & diff_yos==0 & edu_mis==0 & school_mis==0, c(0.25 99.75)  
drop if welle==`welle' & diff_yos==0 & edu_mis==0 & school_mis==0 & (w<r(c_1) | w>r(c_2))
}

qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_wage
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_wage
qui test $instr
scalar F1=r(F)
estimates restore fs_wage
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_wage
restore


***** S9: SSY *****
cap drop SSY*
*** Short school years: Pischke (2007), but age at enrollment 6 (not 7) ****
gen SSY1=(yob==1953 | yob==1961) & (bula==5 | bula==6 | bula==7 | bula==8) 
replace SSY1=1 if (yob==1952 | yob==1961) & (bula==1 | bula==4 | bula==10) 
														/*1952 due to C9==1*/

gen SSY2=(yob>1953 & yob<1961) & (bula==5 | bula==6 | bula==7 | bula==8)
replace SSY2=1 if (yob>1952 & yob<1961) & (bula==1 | bula==4 | bula==10) 

gen SSY=SSY1+SSY2
lab var SSY "Affected by SSY (basic track)"

global instr switch2
global contr age* female year79  year85 year92 year99 SSY 

qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_ssy

replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_ssy
qui test $instr
scalar F1=r(F)
estimates restore fs_ssy
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_ssy


***** S10: squared trends *****
global instr switch2
global contr age* female year79  year85 year92 year99 

qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_q
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_q
qui test $instr
scalar F1=r(F)
estimates restore fs_q
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_q


***** S11: males only *****
preserve
keep if female==0
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_male
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_male
qui test $instr
scalar F1=r(F)
estimates restore fs_male
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0 & edu_mis==0 & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_male
restore


***** S12: incl. missings on education *****
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0  & school_mis==0 ///
		,vce(cluster clust) 
estimates store iv_mis
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 & diff_yos==0 & school_mis==0 ///
		,vce(cluster clust)  
estimates store fs_mis
qui test $instr
scalar F1=r(F)
estimates restore fs_mis
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & diff_yos==0  & school_mis==0 ///
	,vce(cluster clust)  
estimates store rf_mis 


***** S13: years of schooling 6-21 (1-99 percentile) *****
qui ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if yos>=6 & yos<22 ///
		,vce(cluster clust) 
estimates store iv_yos
replace N_=e(sample)
qui reg  yos $instr  i.yob i.bula $contr  i.bula##c.yob_   ///
		if N_==1 &  yos>=6 & yos<22 ///
		,vce(cluster clust)  
estimates store fs_yos
qui test $instr
scalar F1=r(F)
estimates restore fs_yos
qui estadd scalar F1
qui reg  lhwage $instr  i.yob i.bula $contr i.bula##c.yob_  ///
	if N_==1 & yos>=6 & yos<22 ///
	,vce(cluster clust)  
estimates store rf_yos 


* Output for Table 1: PvW’s main estimates: replication and sensitivity tests
estout iv_*  ///
using "$sample\tab1_wages.xls" ///
	, cells(b(fmt(3) star) se(par)) starlevels(* 0.1 ** 0.05 *** 0.01)  ///
	keep(*yos*) title ("IV coefficients") collabel(, none) label ///
	stats(, fmt( 0 0 0) label(  )) replace stardetach  modelwidth(8)
	
estout fs_* ///
using "$sample\tab1_wages.xls" ///
	, cells(b(fmt(3) star) se(par)) starlevels(* 0.1 ** 0.05 *** 0.01)  ///
	keep($instr) title ("First stage") collabel(, none)  ///
	stats(F1, fmt( 2 0 0) label(  )) append stardetach  modelwidth(8)
	
estout rf_*  ///
using "$sample\tab1_wages.xls" ///
	, cells(b(fmt(3) star) se(par)) starlevels(* 0.1 ** 0.05 *** 0.01)  ///
	keep($instr) 	title ("Reduced form") collabel(, none) ///
    stats(N, fmt( 0 0 0) label( )) append stardetach modelwidth(8)


***** Estimates for Figure C.1: Effect of years of schooling on log wages across
*                               alternative ranges of included age years 
estimates drop _all

foreach a1 of numlist 19(1)25 {
foreach a2 of numlist 55(1)65 {
qui  ivregress 2sls lhwage i.yob i.bula $contr i.bula##c.yob_ (yos = $instr) ///
		if diff_yos==0 & edu_mis==0 & school_mis==0 ///
		& age>=`a1' & age<=`a2', vce(cluster clust) 
estimates store a_`a1'_`a2'
}
}

* save the point estimates and confidence intervals in excel file
estout a_*  ///
using "$sample\FigC1_age.xls" ///
	, cells(b(fmt(3) star) ci_l ci_u) starlevels(* 0.1 ** 0.05 *** 0.01) ///
	level(90) keep(*yos*) title ("IV coefficients") collabel(, none) label ///
	stats(N, fmt(0 0 0) ) replace   stardrop(*yos*)
	
	
	