capture log close
set more off 
global npcovariates_withoutyear "age age2 numpeeps lnrearnings_head rearnings_head_zero"
global npcovariates "$npcovariates_withoutyear year2007 year2009 year2011"

use lnrcon lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates_withoutyear everrenter9913 everincomezero9913 evermoved9913 year id rhhinc rhousevalue owner lnrallfood lnrutilities bandedncars using "psiddata", clear

*exclude those with low and high house values
centile rhousevalue if owner==1, centile(1 99)
gen rhousevalue_extreme = (rhousevalue<r(c_1)|rhousevalue>r(c_2))

centile rhhinc, centile(1 99)
gen rhhinc_extreme = (rhhinc<r(c_1)|rhhinc>r(c_2))

gen proxiesnotmissing = lnrallfood+ lnrutilities + bandedncars !=.
gen psid = 1

gen year2007 = year==2007
gen year2009 = year==2009
gen year2011 = year==2011

append using "cexdata", keep(lnrnondurable_psid lnrallfood lnrutilities bandedncars $npcovariates_withoutyear owner int_yr) gen(cex)
replace psid=0 if cex==1
replace cex=0 if psid==1

replace year2007 = int_yr==2007 if cex==1
replace year2009 = int_yr==2009 if cex==1
replace year2011 = int_yr==2011 if cex==1

gen sample1 = 0 
*don't include 2013 as don't have income lead for this year
replace sample1 = 1 if cex==1 & owner==1 & inlist(int_yr,2005,2007,2009,2011)

gen sample2 = 0 
replace sample2 = 1 if psid==1 & everrenter9913==0 & everincomezero9913==0 & rhousevalue_extreme==0 & rhhinc_extreme==0 & evermoved9913==0 & inrange(year,2005,2011) & proxiesnotmissing==1

*normalise age2 so that of similar scale to age (helps convergence)
replace age2 = age2/100

*create new id variable for clustering
replace id = _n if cex==1
egen newid = group(id cex)

*Use results from previous trials 
estimates use "estimates\gmm_estimates_onestep"
	
********************************************************************************	
*first step uses identity weight matrix to get initial parameter estimates and positive definite weight matrix 
	
local criterion = e(Q)
while `criterion'>0.02 {
	 
	local secondstage_startingvalues = ""
	local startingvalues_impute = ""

	foreach var in lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue {
		local coeffvalue = _b["second_stage_x:`var'"]
		if `coeffvalue'==. {
			local coeffvalue = 0
		}
		local secondstage_startingvalues "`secondstage_startingvalues' second_stage_x:`var' = `coeffvalue'"
	}

	forval i = 1/2 {
		local letter = char(64+`i')
		local coeffvalue = _b["g`letter'1:_cons"]
		local startingvalues_impute "`startingvalues_impute' g`letter'1 = `coeffvalue'"
	}

	foreach var in $npcovariates _cons {
		forval i = 1/2 {
			local letter = char(64+`i')
			local coeffvalue = _b["second_stage_cov_`letter':`var'"]
			if `coeffvalue'==. {
				local coeffvalue = 0
			}
			local secondstage_startingvalues "`secondstage_startingvalues' second_stage_cov_`letter':`var' = `coeffvalue'"
			
			local coeffvalue = _b["impute_cov_`letter':`var'"]
			if `coeffvalue'==. {
				local coeffvalue = 0
			}
			local startingvalues_impute "`startingvalues_impute' impute_cov_`letter':`var' = `coeffvalue'"
		}
	}

	local startingvalues_impute "`startingvalues_impute' `secondstage_startingvalues'"

	gmm (imputeA: lnrallfood*sample1 - {impute_cov_A:$npcovariates _cons}*sample1 - {gA1}*lnrnondurable_psid*sample1) ///
	(secondstageA: lnrallfood*sample2 - {gA1}*{second_stage_x:lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue}*sample2 - {second_stage_cov_A:$npcovariates _cons}*sample2) ///
	(imputeB: lnrutilities*sample1 - {impute_cov_B:$npcovariates _cons}*sample1 - {gB1}*lnrnondurable_psid*sample1) ///
	(secondstageB: lnrutilities*sample2 - {gB1}*{second_stage_x:}*sample2 - {second_stage_cov_B:$npcovariates _cons}*sample2), ///
	winitial(identity) nocommonesample onestep ///
	deriv(1/gA1 = -1*lnrnondurable_psid*sample1) deriv(1/impute_cov_A = -1*sample1) ///
	deriv(2/gA1 = -1*{second_stage_x:}*sample2) deriv(2/second_stage_x = -{gA1}*sample2)  deriv(2/second_stage_cov_A = -1*sample2)  ///
	deriv(3/gB1 = -1*lnrnondurable_psid*sample1) deriv(3/impute_cov_B = -1*sample1) ///
	deriv(4/gB1 = -1*{second_stage_x:}*sample2) deriv(4/second_stage_x = -{gB1}*sample2)  deriv(4/second_stage_cov_B = -1*sample2)  ///
	instruments(1: lnrnondurable_psid $npcovariates sample1, nocons) ///
	instruments(2: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
	instruments(3: lnrnondurable_psid $npcovariates sample1, nocons) ///
	instruments(4: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
	from(`startingvalues_impute' `secondstage_startingvalues') vce(cluster newid) conv_maxiter(100) technique(bfgs)
	
	estimates save "estimates\gmm_estimates_onestep", replace
	
	local criterion = e(Q)
	di "Objective function: `criterion'"

}

********************************************************************************	
*Second step uses step one estimates to enter new weight matrix. 

estimates use "estimates\gmm_estimates_onestep"

local secondstage_startingvalues = ""
local startingvalues_impute = ""

foreach var in lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue {
	local coeffvalue = _b["second_stage_x:`var'"]
	if `coeffvalue'==. {
		local coeffvalue = 0
	}
	local secondstage_startingvalues "`secondstage_startingvalues' second_stage_x:`var' = `coeffvalue'"
}

forval i = 1/2 {
	local letter = char(64+`i')
	local coeffvalue = _b["g`letter'1:_cons"]
	local startingvalues_impute "`startingvalues_impute' g`letter'1 = `coeffvalue'"
}

foreach var in $npcovariates _cons {
	forval i = 1/2 {
		local letter = char(64+`i')
		local coeffvalue = _b["second_stage_cov_`letter':`var'"]
		if `coeffvalue'==. {
			local coeffvalue = 0
		}
		local secondstage_startingvalues "`secondstage_startingvalues' second_stage_cov_`letter':`var' = `coeffvalue'"
		
		local coeffvalue = _b["impute_cov_`letter':`var'"]
		if `coeffvalue'==. {
			local coeffvalue = 0
		}
		local startingvalues_impute "`startingvalues_impute' impute_cov_`letter':`var' = `coeffvalue'"
	}
}

local startingvalues_impute "`startingvalues_impute' `secondstage_startingvalues'"

matrix weightmatrix = e(W)

gmm (imputeA: lnrallfood*sample1 - {impute_cov_A:$npcovariates _cons}*sample1 - {gA1}*lnrnondurable_psid*sample1) ///
(secondstageA: lnrallfood*sample2 - {gA1}*{second_stage_x:lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue}*sample2 - {second_stage_cov_A:$npcovariates _cons}*sample2) ///
(imputeB: lnrutilities*sample1 - {impute_cov_B:$npcovariates _cons}*sample1 - {gB1}*lnrnondurable_psid*sample1) ///
(secondstageB: lnrutilities*sample2 - {gB1}*{second_stage_x:}*sample2 - {second_stage_cov_B:$npcovariates _cons}*sample2), ///
winitial(weightmatrix) nocommonesample ///
deriv(1/gA1 = -1*lnrnondurable_psid*sample1) deriv(1/impute_cov_A = -1*sample1) ///
deriv(2/gA1 = -1*{second_stage_x:}*sample2) deriv(2/second_stage_x = -{gA1}*sample2)  deriv(2/second_stage_cov_A = -1*sample2)  ///
deriv(3/gB1 = -1*lnrnondurable_psid*sample1) deriv(3/impute_cov_B = -1*sample1) ///
deriv(4/gB1 = -1*{second_stage_x:}*sample2) deriv(4/second_stage_x = -{gB1}*sample2)  deriv(4/second_stage_cov_B = -1*sample2)  ///
instruments(1: lnrnondurable_psid $npcovariates sample1, nocons) ///
instruments(2: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
instruments(3: lnrnondurable_psid $npcovariates sample1, nocons) ///
instruments(4: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
from(`startingvalues_impute' `secondstage_startingvalues') vce(cluster newid) 
	
estimates save "estimates\gmm_estimates_twostep", replace

log using "log\AM_results", text replace

estimates replay

*Hansen's J-test
estat overid

log close

gmm (imputeA: lnrallfood*sample1 - {impute_cov_A:$npcovariates _cons}*sample1 - {gA1}*lnrnondurable_psid*sample1) ///
(secondstageA: lnrallfood*sample2 - {gA1}*{second_stage_x:lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue}*sample2 - {second_stage_cov_A:$npcovariates _cons}*sample2) ///
winitial(identity) nocommonesample ///
deriv(1/gA1 = -1*lnrnondurable_psid*sample1) deriv(1/impute_cov_A = -1*sample1) ///
deriv(2/gA1 = -1*{second_stage_x:}*sample2) deriv(2/second_stage_x = -{gA1}*sample2)  deriv(2/second_stage_cov_A = -1*sample2)  ///
instruments(1: lnrnondurable_psid $npcovariates sample1, nocons) ///
instruments(2: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
vce(cluster newid) 

gmm  (imputeB: lnrutilities*sample1 - {impute_cov_B:$npcovariates _cons}*sample1 - {gB1}*lnrnondurable_psid*sample1) ///
(secondstageB: lnrutilities*sample2 - {gB1}*{second_stage_x:lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue}*sample2 - {second_stage_cov_B:$npcovariates _cons}*sample2), ///
winitial(identity) nocommonesample ///
deriv(1/gB1 = -1*lnrnondurable_psid*sample1) deriv(1/impute_cov_B = -1*sample1) ///
deriv(2/gB1 = -1*{second_stage_x:}*sample2) deriv(2/second_stage_x = -{gB1}*sample2)  deriv(2/second_stage_cov_B = -1*sample2)  ///
instruments(1: lnrnondurable_psid $npcovariates sample1, nocons) ///
instruments(2: lnrhhinc_lag3 lnrhhinc_lag2 lnrhhinc_lag1 lnrhhinc lnrhhinc_lead1 lnrhousevalue $npcovariates sample2, nocons) ///
vce(cluster newid) 
	