/*******************************************************************
 This file restricts the data as in Pischke & von Wachter (2008)
 and replicates their estimates. It generates estimates for 
 the Appendix Tables C.1, C.2, C.3, and Figure B.1.
********************************************************************/

use "$sample\compuls.dta", clear
gen welle=year+1900

* sample restrictions 
 
gen dif = length - school /*generate and clean length of school variable*/
drop if dif < -2 | dif > 4 
drop dif

keep if yob >= 30 & yob <= 60 /*cohorts 1930-1960*/
keep if age >= 19 & age <= 65 /*age 19-65*/
keep if bula < 11 /*only West German states (excl. Berlin)*/

drop if school2 == . | exp == . | lnw == . /*missings*/

replace yob = yob+1900
gen yos=length
gen lhwage=lnw

count
* set globals for the instrument and covariates
global instr switch2
global contr age* female year79 year85 year92 year99 


*** First-stage estimations for Appendix Table C.1, Panel B
estimates drop _all
qui reg length $instr  $contr i.bula##c.yob i.yob, cluster(clust)
estimates store pw_fs
qui test $instr
scalar F1=r(F)
estimates restore pw_fs
qui estadd scalar F1
qui reg length $instr  $contr i.bula##c.yob i.yob if basic==1, cluster(clust)
estimates store pw_fs_b
qui test $instr
scalar F1=r(F)
estimates restore pw_fs_b
qui estadd scalar F1
qui reg basic $instr  $contr i.bula##c.yob i.yob, cluster(clust)
estimates store pw_fs_bd

* Output for Appendix Table C.1, Panel B
estout pw_fs*  ///
	, cells(b(fmt(3) star) se(par)) starlevels(* 0.1 ** 0.05 *** 0.01)  ///
	keep($instr) collabel(, none) label ///
	title ("Table C.1 Replication: First-stage effect (Panel B)")  ///
	stats(N, fmt(0) label("Observations")) stardetach  modelwidth(8)

estimates drop _all	
*** OLS and IV estimations for Appendix Table C.2, Panel B
qui reg lnw ed2      $contr i.bula, cluster(clust)
estimates store pw_ols
qui reg lnw length   $contr i.bula, cluster(clust)
estimates store pw_ols1
qui reg lnw $instr    $contr i.bula##c.yob i.yob, cluster(clust)
estimates store pw_rf
qui ivreg2 lnw (length=$instr) $contr i.bula##c.yob i.yob, cluster(clust)
estimates store pw_iv
qui reg lnw $instr    $contr i.bula##c.yob i.yob if basic==1, cluster(clust)
estimates store pw_rf_b
qui ivreg2 lnw (length=$instr) $contr i.bula##c.yob i.yob if basic==1, cluster(clust)
estimates store pw_iv_b
	
* Output for Appendix Table C.2, Panel B
estout pw_*  ///
	, cells(b(fmt(3) star) se(par)) starlevels(* 0.1 ** 0.05 *** 0.01)  ///
	keep(ed2 length $instr) collabel(, none) label ///
	title ("Table C.2: Replication: Wage returns to schooling (Panel B)")  ///
	stats(N, fmt(0) label(  )) stardetach  modelwidth(8)	


*** Descriptive statistics for Table C.3	

* Calculate sample means and other stats
* variables with one-digit format 
macro define descr  welle yob age school2 length train ed2
quietly tabstat $descr   ///
	, statistics(mean, sd, N ) format(%9.3f) columns(statistics) save
matrix StatTotal1=r(StatTotal)	
matrix rownames StatTotal1 = "Mean" "StDev" "Obs"	
* variables with two-digit format 	
macro define descr female school8 school9 school10 school12 school13 self  
quietly tabstat $descr   ///
	, statistics(mean, sd, N ) format(%9.3f) columns(statistics) save
matrix StatTotal2=r(StatTotal)	
matrix rownames StatTotal2 = "Mean" "StDev" "Obs"

* Output for Table C.3: Replication: Sample means 
estout matrix(StatTotal1, fmt("1" "1"  "0") transpose)
estout matrix(StatTotal2, fmt("2" "2" "0") transpose)		


* Figure B.1: Shares of students by completed school track over time
preserve
* Generate dummies for the highest completed school track
recode degree* (3 4=3) (5=.) 
egen degree_max=rowmax(degree degree_) if welle==1999
replace degree=degree_max if welle==1999 & degree_!=.
drop degree_max

lab def degree 0 "0: no degree" 1 "1: basic" 2 "2: intermediate" ///
			   3 "3: high (Abi/Fachabi)" 5 "4: other"
lab val degree degree
qui tab degree, gen(degree_) mis

replace degree_2=degree_1+degree_2
lab var degree_2 "Basic track (at most)"
lab var degree_3 "Middle track"
lab var degree_4 "Academic track"

collapse (mean) degree_2 degree_3 degree_4 , by(yob) 

* generate variables for an area plot
gen basic=degree_2
gen middle=degree_2+degree_3
gen academic=1

drop degree_*

twoway (area academic yob, color(gs15)) (area middle yob, color(gs12)) ///
	(area basic yob, color(gs8)), ///
	xlabel(1930 (5) 1960) ylabel(0 (0.25) 1, nogrid val) xtitle("") ///
	text(0.3 1940 "{bf:Basic track}") text(0.26 1940 "{bf:(at most)}") ///
	text(0.75 1947 "{bf:Middle track}") text(0.90 1955 "{bf:Academic track}") ///
	legend(off) graphregion(color(white))

graph export "$sample\figB1_tracks.pdf", as(pdf) replace
restore
	



