
clear
set more off
set matsize 10000
set varabbrev off

*argument 1 is start year and argument 2 is end year
local start_year=`1'
local end_year=`2'

local year1 1997
local year2 2004
local year3 2007
local year4 2010

local listyear

forvalues i=`start_year'/`end_year' {
	local listyear `listyear' `year`i''
}

*Load data, see data preparation code for creation. (Place data file created in this "Data" folder.)
use Data/SuriPanel_extended.dta, clear

*Keep years corresponding to desired period of observation
capture drop todrop
gen todrop=1
foreach year in `listyear' {
	replace todrop=0 if year==`year'
}
drop if todrop

*Drop high HIV districts
drop if dist=="siaya"|dist=="kisumu"

*Create dependent variable
gen lyield=log(kg_harv/acres)
drop if lyield==.

*Create explanatory variable (strictly positive quantity purchased of hybrid seed as in Suri)
drop if hybrid_purch==.
capture drop hybrid
gen hybrid=(hybrid_purch>0)

*Recode geographical indicators
drop if prov==""
encode prov, gen(prov2)
drop prov
rename prov2 prov

drop if vil==""
encode vil, gen(vil2)
drop vil
rename vil2 vil

drop if dist==""
encode dist, gen(dist2)
drop dist
rename dist2 dist

*No duplicates
duplicates report hhid year

*Keep cross-sectional observations if at least two time periods observed 
gen ones=1
egen count=sum(ones), by(hhid)
keep if count>=2

*Define switchers
capture drop temp1
capture drop temp2
egen temp1=max(hybrid), by(hhid)
egen temp2=min(hybrid), by(hhid)
gen switcher=(temp1!=temp2)

gen always=(temp1==temp2&temp2==1)
gen never=(temp1==temp2&temp2==0)

unique hhid
unique hhid if switcher

sort hhid year

*Create time period variable (instead of year, so that increments are one)
local per=1
gen per=.
foreach year in `listyear' {
	replace per=`per' if year==`year'
	local per=`per'+1
}
local nper=`per'-1

xtset hhid per

*create list of covariates
tab prov, gen(provd)
local count=1
capture drop ones
gen ones=1
foreach var in acres seedkg lpcost totfertexp hiredlabor_S familylabor_S main hhsize boys girls oldermen women {
	forvalues per=1/`nper' {
		capture drop w`count'
		gen w`count'=`var'*(per==`per')
		local count=`count'+1
	}
}

foreach var in provd1 provd2 provd3 provd4 provd5 provd6 {
	local nperm1=`nper'-1
	forvalues per=1/`nperm1' {
		capture drop w`count'
		gen w`count'=`var'*(per==`per')
		local count=`count'+1
	}
}


local count=`count'-1
di `count'
scalar nw=`count'

*Chamberlain 1992 regression (with homoscedastic weights instead of semiparametric efficient estimator)

*Make sure two methods are identical (the second one is more convenient for inference)
capture drop hhid_hybrid
gen hhid_hybrid=hhid*10+hybrid
areg lyield w1-w`count', absorb(hhid_hybrid)

capture drop yresid
predict yresid if hhid_hybrid!=., dresiduals

capture drop temp
predict temp, d

capture drop temp1
capture drop temp2
gen temp1=temp if hybrid
egen temp2=mean(temp1), by(hhid)
gen apb=temp2

capture drop temp1
capture drop temp2
gen temp1=temp if !hybrid
egen temp2=mean(temp1), by(hhid)
gen a=temp2

gen return1=apb-a if switcher
gen a1=a if never|switcher
gen apb1=apb if always|switcher

*Second method, where use partialling out

forvalues var=1/`count' {
	capture drop wd`var'
	capture drop temp
	areg w`var', absorb(hhid_hybrid)
	predict wd`var', resid
}
capture drop yd
areg lyield, absorb(hhid_hybrid)
predict yd, resid

reg yd wd1-wd`count', nocons

matrix gamma_fs=e(b)

mata: gamma_fs=st_matrix("gamma_fs")

capture drop return
capture drop a
capture drop apb

forvalues var=1/`count' {
	rename wd`var' wd`var'_archive
	gen wd`var'=w`var'
}
capture drop temp1
predict temp1, xb

capture drop temp2
gen temp2=lyield-temp1
capture drop temp
egen temp=mean(temp2), by(hhid_hybrid)

capture drop temp1
capture drop temp2
gen temp1=temp if hybrid
egen temp2=mean(temp1), by(hhid)
gen apb=temp2

capture drop temp1
capture drop temp2
gen temp1=temp if !hybrid
egen temp2=mean(temp1), by(hhid)
gen a=temp2

gen return2=apb-a if switcher
gen a2=a if never|switcher
gen apb2=apb if always|switcher

*	Indeed identical results
capture drop temp
gen temp=return2-return1
su temp
su temp if switcher
capture drop temp
gen temp=apb2-apb1
su temp
su temp if switcher
capture drop temp
gen temp=a2-a1
su temp
su temp if switcher

capture drop return
capture drop apb
capture drop a

rename return2 return
rename apb2 apb
rename a2 a

forvalues var=1/`count' {
	drop wd`var'
	rename wd`var'_archive wd`var'
}

*Create indicator for hybrid seed use in each time period, used below.
forvalues per=1/`nper' {
	gen hybrid`per'=hybrid if per==`per'
	capture drop temp
	egen temp=max(hybrid`per'), by(hhid)
	replace hybrid`per'=temp
}


