clear
set memory 100M
set more off
use cross_replic.dta

* Revised 01-28-18 for JAE revision

* Rescale distance metrics
replace fst_weighted_usa=fst_weighted_usa/10000
label variable fst_weighted_usa "FST genetic distance to the USA, weighted, Cavalli-Sforza et al. (1994)"
replace fst_1500_uk=fst_1500_uk/10000
replace dist_usa=dist_usa/10000

* Generate latitude variable
destring latitude, ignore(",") replace
replace latitude=latitude/100
label variable latitude "Absolute latitude"

* Generate logs of per capital income 
gen lrgdpch_2005=log(rgdpch_2005)
label variable lrgdpch_2005 "Log per capita income in 2005, PWT6.3"

* Relabel

label variable kgatr "% land area in the tropics"
label variable dist_usa "Geodesic distance to the USA"
label variable landlock "Landlocked dummy"
label variable island "Island dummy"
label variable ssafrica "Sub-Saharan Africa Dummy "

#delimit;

* Genetic Distance and Development, cross-section regressions;
* Specifications are a blend of JEL and QJE specifications;
* 2005 income data throughout;
* Split into three tables: One for geography controls, one for continent effects, one for genetic diversity controls;

* Table 2 - Baseline results;

regress lrgdpch_2005 new_gendist_weighted_to_usa if difflat_usa~=., robust beta;
outreg using regressions\table2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Univariate") title("Table 2: Income Level Regressions, log income per capita 2005") replace;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa, robust beta;
outreg using regressions\table2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Dist. & geo. controls") merge;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr, robust beta;
outreg using regressions\table2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Add tropics control") merge;
ivreg lrgdpch_2005 latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr (new_gendist_weighted_to_usa=new_gendist_1500_to_uk), robust beta;
outreg using regressions\table2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) varlabels landscape ctitle("" "IV using 1500 gen. dist.") merge;

* Table 3 - Systematic exploration of continent effects using the Pemberton data;

gen africa=ssafrica+nafrica;
label variable africa "Africa dummy";
gen asia=seasia+soasia+scasia+mideast;
label variable asia "Asia dummy";
gen europe=weurope+eeurope;
label variable europe "Europe dummy";

regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa europe africa namerica laamcarib asia, robust beta;
outreg using regressions\table3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "All Continent dummies") 
title("Table 3: Exploration of Regional Effects using the Pemberton et al. (2013) Data") replace;;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa weurope eeurope namerica lamerica caribbean ssafrica nafrica seasia soasia scasia mideast, robust beta;
outreg using regressions\table3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "All Macroregion dummies") merge;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if ssafrica==0, robust beta;
outreg using regressions\table3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing Sub-Saharan African countries") merge;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if (europe==0), robust beta;
outreg using regressions\table3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing European countries") merge;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if (europe==0 & ssafrica==0), robust beta;
outreg using regressions\table3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing European / SS African countries") merge;

* Table 4 - Genetic distance and genetic diversity;

regress lrgdpch_2005 new_gendist_weighted_to_usa pdiv pdiv_sqr, robust beta;
outreg using regressions\table4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Gen. div. controls") title("Table 4: Genetic Distance and Genetic Diversity, log income per capita 2005") replace;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr pdiv pdiv_sqr, robust beta;
outreg using regressions\table4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Gen. div. & dist. & geo. controls") merge;
regress lrgdpch_2005 new_gendist_weighted_to_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr pdiv pdiv_sqr lingdist_weighted_formula_usa reldist_weighted_formula_usa, robust beta;
outreg using regressions\table4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) varlabels landscape ctitle("" "Add linguistic and religious distance") merge;
* 
******************************************
* Now regressions for the online appendix;
* Same as above, using Cavalli-Sforza data instead of Pemberton data;

* Table A2 - Analog to Table 2;

regress lrgdpch_2005 fst_weighted_usa if difflat_usa~=., robust beta;
outreg using regressions\tableA2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Univariate") title("Table A2: Income Level Regressions, log income per capita 2005") replace;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa, robust beta;
outreg using regressions\tableA2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Dist. & geo. controls") merge;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr, robust beta;
outreg using regressions\tableA2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Add tropics control") merge;
ivreg lrgdpch_2005 latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr (fst_weighted_usa=fst_1500_uk), robust beta;
outreg using regressions\tableA2.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) varlabels landscape  ctitle("" "IV using 1500 gen. dist.") merge;

* Table A3 - Analog to Table 3;

regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa europe africa namerica laamcarib asia, robust beta;
outreg using regressions\tableA3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "All Continent dummies") 
title("Table A3: Exploration of Regional Effects using the Cavalli-Sforza Data") replace;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa weurope eeurope namerica lamerica caribbean ssafrica nafrica seasia soasia scasia mideast, robust beta;
outreg using regressions\tableA3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "All Macroregion dummies") merge;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if ssafrica==0, robust beta;
outreg using regressions\tableA3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing Sub-Saharan African countries") merge;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if (europe==0), robust beta;
outreg using regressions\tableA3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing European countries") merge;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa if (europe==0 & ssafrica==0), robust beta;
outreg using regressions\tableA3.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Removing European / SS African countries") merge;

* Table A4 - Analog to Table 4;

regress lrgdpch_2005 fst_weighted_usa pdiv pdiv_sqr, robust beta;
outreg using regressions\tableA4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Gen. div. controls") title("Table A4: Genetic Distance and Genetic Diversity, log income per capita 2005") replace;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr pdiv pdiv_sqr, robust beta;
outreg using regressions\tableA4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) nodisplay varlabels landscape ctitle("" "Gen. div. & dist. & geo. controls") merge;
regress lrgdpch_2005 fst_weighted_usa latitude landlock island dist_usa difflat_usa difflong_usa common_water_usa contig_usa kgatr pdiv pdiv_sqr lingdist_weighted_formula_usa reldist_weighted_formula_usa, robust beta;
outreg using regressions\tableA4.doc, bdec(3) tdec(2) starlevels(10 5 1) summstat(N \ r2_a) varlabels landscape ctitle("" "Add linguistic and religious distance") merge;
