* PURPOSE: RUN SEMIPARAMETRIC ESTIMATION DISCUSSED IN BINARY_SELECTION PAPER
* LIMIT SAMPLE TO WHITE WOMEN

# delimit;

clear;
clear matrix;
clear mata;

set mem 1000m;
set matsize 5000;

set seed 0910151150;

cd "c:\Nastja\Research\BinarySelection\NLSY79\";

capture log close;
log using logs\semiparam_regs.log, replace;

use "data\NLSY79_91_94.dta", clear;

******************************
* LIMIT SAMPLE TO WHITE WOMEN;

tab SEX RACE;
keep if SEX==2 & RACE==3;

rename NUM_SIBS79 oNUM_SIBS79;
rename NUM_OLDER_SIBS79 oNUM_OLDER_SIBS79;
rename NET_WORTH oNET_WORTH;
rename NOINT oNOINT;
rename NUMKID oNUMKID;

replace year=year-1989;
*replace year=year-1987;

sum afqt;
replace afqt=(afqt-r(mean))/r(sd);

**************************************************************
* NOTE: qreg DID NOT PRODUCE REASONABLE RESULTS, 
	scls DID NOT CONVERGE IN BOOTSTRAP REPLICATIONS, SO USE TOBIT;

*** NOTE: RULE-OF-THUMB BANDWIDTH CHOICE IS N^(-1/(4+q)), WHERE q IS DIMENSION OF X
	  q=5 for estimating g (x: age educ afqt_1 mmarried v2hat)
	  q=2 for estimating weighting function at second stage (x: g and v2hat);

*** BELOW ALSO ADD MULTIPLCATION BY sd OF THE CORRESPONDING VARIABLE FOR EACH INDIVIDAUL KERNAL;

qui sum id if year==1;
scalar N=r(N);
scalar T=5;

scalar di N;

scalar h_g=(N^(-1/9));	/* h_g = bandwidth for estimating 1st stage conditional expectation, g */
scalar h_w=(N)^(-1/6);	/* h = bandwidth for 2nd stage estimation of weighting function (joint density of g and v2hat) */

*****************************************************************;
tab year;

gen y1=pension;
gen y2=hrswrk52;

save "data\NLSY79_bin_sel_bootstrap.dta", replace;

**************************************************************************************;
scalar reps=2;


mata;

stata ("sort id year")

stata ("qui tobit y2 age educ married afqt_1 mmarried if year==1, ll(0)")
stata ("predict y2hat1")
stata ("gen v2hat=y2-y2hat1 if y2>0 & year==1")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==2, ll(0)")
stata ("predict y2hat2")
stata ("replace v2hat=y2-y2hat2 if y2>0 & year==2")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==3, ll(0)")
stata ("predict y2hat3")
stata ("replace v2hat=y2-y2hat3 if y2>0 & year==3")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==4, ll(0)")
stata ("predict y2hat4")
stata ("replace v2hat=y2-y2hat4 if y2>0 & year==4")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==5, ll(0)")
stata ("predict y2hat5")
stata ("replace v2hat=y2-y2hat5 if y2>0 & year==5")

stata ("qui drop y2hat1 y2hat2 y2hat3 y2hat4 y2hat5")

stata ("* USE LEAST SQUARES CROSS-VALIDATION TO FIND OPTIMAL BANDWIDTH FOR COMPUTING NONPARAMETRIC CONDITIONAL EXPECTATION g *")
stata ("*** NOTE: 2nd and 3rd arguments in st_view(allv=.,.,.) command below relate to obs (rows) and vars (columns), 0 means omit obs with missing data")

stata ("qui gen h_g=N^(-1/9)")
stata ("qui gen h_w=N^(-1/6)")

stata ("qui sum y1 if y1~=. & year==1")
stata ("qui gen Ns1=r(N)")

stata ("qui sum y1 if y1~=. & year==2")
stata ("qui gen Ns2=r(N)")

stata ("qui sum y1 if y1~=. & year==3")
stata ("qui gen Ns3=r(N)")

stata ("qui sum y1 if y1~=. & year==4")
stata ("qui gen Ns4=r(N)")

stata ("qui sum y1 if y1~=. & year==5")
stata ("qui gen Ns5=r(N)")

stata ("qui replace h_g=. if y1==.")
stata ("qui replace h_w=. if y1==.")
stata ("qui replace age=. if y1==.")
stata ("qui replace educ=. if y1==.")
stata ("qui replace afqt_1=. if y1==.")
stata ("qui replace mmarried=. if y1==.")
stata ("qui replace v2hat=. if y1==.")
stata ("qui replace Ns1=. if y1==.")
stata ("qui replace Ns2=. if y1==.")
stata ("qui replace Ns3=. if y1==.")
stata ("qui replace Ns4=. if y1==.")
stata ("qui replace Ns5=. if y1==.")

stata ("order h_g h_w y1 age educ afqt_1 mmarried v2hat Ns1 Ns2 Ns3 Ns4 Ns5")
stata ("sort year id")

st_view(allv=.,.,1..13,0)
st_subview(h_g=.,allv,.,1)
st_subview(h_w=.,allv,.,2)
st_subview(y=.,allv,.,3)
st_subview(v2=.,allv,.,8)
st_subview(Ns1=.,allv,.,9)
st_subview(Ns2=.,allv,.,10)
st_subview(Ns3=.,allv,.,11)
st_subview(Ns4=.,allv,.,12)
st_subview(Ns5=.,allv,.,13)

st_subview(Xvars=.,allv,.,4..7)

hg=h_g[1]
hw=h_w[1]
Ns1=Ns1[1]
Ns2=Ns2[1]
Ns3=Ns3[1]
Ns4=Ns4[1]
Ns5=Ns5[1]

Omega_inv = cholinv(variance(Xvars))
Omega_inv_sqrt = cholesky(Omega_inv)
Xvars_tilde = Xvars * Omega_inv_sqrt

age = Xvars_tilde[.,1]
educ = Xvars_tilde[.,2]
afqt = Xvars_tilde[.,3]
mmar = Xvars_tilde[.,4]

y1=y[1..Ns1,1]
y2=y[Ns1+1..Ns1+Ns2,1]
y3=y[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
y4=y[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
y5=y[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

age1=age[1..Ns1,1]
age2=age[Ns1+1..Ns1+Ns2,1]
age3=age[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
age4=age[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
age5=age[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

educ1=educ[1..Ns1,1]
educ2=educ[Ns1+1..Ns1+Ns2,1]
educ3=educ[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
educ4=educ[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
educ5=educ[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

afqt1=afqt[1..Ns1,1]
afqt2=afqt[Ns1+1..Ns1+Ns2,1]
afqt3=afqt[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
afqt4=afqt[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
afqt5=afqt[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

mmar1=mmar[1..Ns1,1]
mmar2=mmar[Ns1+1..Ns1+Ns2,1]
mmar3=mmar[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
mmar4=mmar[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
mmar5=mmar[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

v21=v2[1..Ns1,1]
v22=v2[Ns1+1..Ns1+Ns2,1]
v23=v2[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
v24=v2[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
v25=v2[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

sd_age = variance(age1)^(1/2)
sd_educ = variance(educ1)^(1/2)
sd_afqt = variance(afqt1)^(1/2)
sd_mmar = variance(mmar1)^(1/2)
sd_v2 = variance(v21)^(1/2)

j1=J(1,Ns1,1)
age_xx_1=(age1*j1):-age1'
educ_xx_1=(educ1*j1):-educ1'
afqt_xx_1=(afqt1*j1):-afqt1'
mmar_xx_1=(mmar1*j1):-mmar1'
v2_xx_1=(v21*j1):-v21'
d1=I(Ns1)*(2*pi())^(-1/2)

Cg = 1
Cgopt1 = Cg
CVmin = 1000000000000000

i=1
while(i<=30) {
   Cg = Cg+0.03
   Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_age)^2:*age_xx_1:^2))-d1
   Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_educ)^2:*educ_xx_1:^2))-d1
   Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_afqt)^2:*afqt_xx_1:^2))-d1
   Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_mmar)^2:*mmar_xx_1:^2))-d1
   Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_v2)^2:*v2_xx_1:^2))-d1
   CV = (y1-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y1):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j1'))'*(y1-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y1):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j1'))

   if(CV<CVmin) {
       CVmin = CV
       Cgopt1 = Cg
   }
   i++
}

Cgopt1

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_age)^2:*age_xx_1:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_educ)^2:*educ_xx_1:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_afqt)^2:*afqt_xx_1:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_mmar)^2:*mmar_xx_1:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_v2)^2:*v2_xx_1:^2))
gopt1 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y1):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j1')

ygopt1=(y1,gopt1)
corr_g1_y1 = correlation(ygopt1)
corr_g1_y1

stata ("qui probit y1 age educ afqt mmar v2hat if year==1")
stata ("qui predict y1hat1")
stata ("corr y1 y1hat1 if year==1")

sd_age = variance(age2)^(1/2)
sd_educ = variance(educ2)^(1/2)
sd_afqt = variance(afqt2)^(1/2)
sd_mmar = variance(mmar2)^(1/2)
sd_v2 = variance(v22)^(1/2)

j2=J(1,Ns2,1)
age_xx_2=(age2*j2):-age2'
educ_xx_2=(educ2*j2):-educ2'
afqt_xx_2=(afqt2*j2):-afqt2'
mmar_xx_2=(mmar2*j2):-mmar2'
v2_xx_2=(v22*j2):-v22'
d2=I(Ns2)*(2*pi())^(-1/2)

Cg = 1
Cgopt2 = Cg
CVmin = 1000000000000000

i=1
while(i<=30) {
   Cg = Cg+0.03
   Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_age)^2:*age_xx_2:^2))-d2
   Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_educ)^2:*educ_xx_2:^2))-d2
   Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_afqt)^2:*afqt_xx_2:^2))-d2
   Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_mmar)^2:*mmar_xx_2:^2))-d2
   Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_v2)^2:*v2_xx_2:^2))-d2
   CV = (y2-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y2):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j2'))'*(y2-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y2):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j2'))

   if(CV<CVmin) {
       CVmin = CV
       Cgopt2 = Cg
   }
   i++
}

Cgopt2

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_age)^2:*age_xx_2:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_educ)^2:*educ_xx_2:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_afqt)^2:*afqt_xx_2:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_mmar)^2:*mmar_xx_2:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_v2)^2:*v2_xx_2:^2))
gopt2 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y2):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j2')

ygopt2=(y2,gopt2)
corr_g2_y2 = correlation(ygopt2)
corr_g2_y2

stata ("qui probit y1 age educ afqt mmar v2hat if year==2")
stata ("qui predict y1hat2")
stata ("corr y1 y1hat2 if year==2")

sd_age = variance(age3)^(1/2)
sd_educ = variance(educ3)^(1/2)
sd_afqt = variance(afqt3)^(1/2)
sd_mmar = variance(mmar3)^(1/2)
sd_v2 = variance(v23)^(1/2)

j3=J(1,Ns3,1)
age_xx_3=(age3*j3):-age3'
educ_xx_3=(educ3*j3):-educ3'
afqt_xx_3=(afqt3*j3):-afqt3'
mmar_xx_3=(mmar3*j3):-mmar3'
v2_xx_3=(v23*j3):-v23'
d3=I(Ns3)*(2*pi())^(-1/2)

Cg = 1
Cgopt3 = Cg
CVmin = 1000000000000000

i=1
while(i<=30) {
   Cg = Cg+0.03
   Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_age)^2:*age_xx_3:^2))-d3
   Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_educ)^2:*educ_xx_3:^2))-d3
   Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_afqt)^2:*afqt_xx_3:^2))-d3
   Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_mmar)^2:*mmar_xx_3:^2))-d3
   Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_v2)^2:*v2_xx_3:^2))-d3
   CV = (y3-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y3):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j3'))'*(y3-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y3):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j3'))

   if(CV<CVmin) {
       CVmin = CV
       Cgopt3 = Cg
   }
   i++
}

Cgopt3

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_age)^2:*age_xx_3:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_educ)^2:*educ_xx_3:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_afqt)^2:*afqt_xx_3:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_mmar)^2:*mmar_xx_3:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_v2)^2:*v2_xx_3:^2))
gopt3 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y3):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j3')

ygopt3=(y3,gopt3)
corr_g3_y3 = correlation(ygopt3)
corr_g3_y3

stata ("qui probit y1 age educ afqt mmar v2hat if year==3")
stata ("qui predict y1hat3")
stata ("corr y1 y1hat3 if year==3")

sd_age = variance(age4)^(1/2)
sd_educ = variance(educ4)^(1/2)
sd_afqt = variance(afqt4)^(1/2)
sd_mmar = variance(mmar4)^(1/2)
sd_v2 = variance(v24)^(1/2)

j4=J(1,Ns4,1)
age_xx_4=(age4*j4):-age4'
educ_xx_4=(educ4*j4):-educ4'
afqt_xx_4=(afqt4*j4):-afqt4'
mmar_xx_4=(mmar4*j4):-mmar4'
v2_xx_4=(v24*j4):-v24'
d4=I(Ns4)*(2*pi())^(-1/2)

Cg = 1
Cgopt4 = Cg
CVmin = 1000000000000000

i=1
while(i<=30) {
   Cg = Cg+0.03
   Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_age)^2:*age_xx_4:^2))-d4
   Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_educ)^2:*educ_xx_4:^2))-d4
   Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_afqt)^2:*afqt_xx_4:^2))-d4
   Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_mmar)^2:*mmar_xx_4:^2))-d4
   Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_v2)^2:*v2_xx_4:^2))-d4
   CV = (y4-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y4):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j4'))'*(y4-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y4):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j4'))

   if(CV<CVmin) {
       CVmin = CV
       Cgopt4 = Cg
   }
   i++
}

Cgopt4

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_age)^2:*age_xx_4:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_educ)^2:*educ_xx_4:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_afqt)^2:*afqt_xx_4:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_mmar)^2:*mmar_xx_4:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_v2)^2:*v2_xx_4:^2))
gopt4 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y4):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j4')

ygopt4=(y4,gopt4)
corr_g4_y4 = correlation(ygopt4)
corr_g4_y4

stata ("qui probit y1 age educ afqt mmar v2hat if year==4")
stata ("qui predict y1hat4")
stata ("corr y1 y1hat4 if year==4")

sd_age = variance(age5)^(1/2)
sd_educ = variance(educ5)^(1/2)
sd_afqt = variance(afqt5)^(1/2)
sd_mmar = variance(mmar5)^(1/2)
sd_v2 = variance(v25)^(1/2)

j5=J(1,Ns5,1)
age_xx_5=(age5*j5):-age5'
educ_xx_5=(educ5*j5):-educ5'
afqt_xx_5=(afqt5*j5):-afqt5'
mmar_xx_5=(mmar5*j5):-mmar5'
v2_xx_5=(v25*j5):-v25'
d5=I(Ns5)*(2*pi())^(-1/2)

Cg = 1
Cgopt5 = Cg
CVmin = 1000000000000000

i=1
while(i<=30) {
   Cg = Cg+0.03
   Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_age)^2:*age_xx_5:^2))-d5
   Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_educ)^2:*educ_xx_5:^2))-d5
   Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_afqt)^2:*afqt_xx_5:^2))-d5
   Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_mmar)^2:*mmar_xx_5:^2))-d5
   Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cg*sd_v2)^2:*v2_xx_5:^2))-d5
   CV = (y5-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y5):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j5'))'*(y5-((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y5):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j5'))

   if(CV<CVmin) {
       CVmin = CV
       Cgopt5 = Cg
   }
   i++
}

Cgopt5

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_age)^2:*age_xx_5:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_educ)^2:*educ_xx_5:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_afqt)^2:*afqt_xx_5:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_mmar)^2:*mmar_xx_5:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_v2)^2:*v2_xx_5:^2))
gopt5 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y5):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j5')

ygopt5=(y5,gopt5)
corr_g5_y5 = correlation(ygopt5)
corr_g5_y5

stata ("qui probit y1 age educ afqt mmar v2hat if year==5")
stata ("qui predict y1hat5")
stata ("corr y1 y1hat5 if year==5")

gg1=(gopt1*j1):-gopt1'
gg2=(gopt2*j2):-gopt2'
gg3=(gopt3*j3):-gopt3'
gg4=(gopt4*j4):-gopt4'
gg5=(gopt5*j5):-gopt5'

sd_g = variance(gopt1)^(1/2)
sd_v2 = variance(v21)^(1/2)

Cw = 0.00000001
Cwopt1 = Cw
CVwmin = 1000000000000000

i=1
while(i<=40) {
   Cw = Cw+0.01

   Kbgg = (1/(hw*Cw*sd_g))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_g)^2:*gg1:^2))
   Kbv2v2 = (1/(hw*Cw*sd_v2))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))

   dg=I(Ns1)*(1/(hw*Cw*sd_g))*(2*pi())^(-1/2)
   dv2=I(Ns1)*(1/(hw*Cw*sd_v2))*(2*pi())^(-1/2)
   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg1:^2))-dg
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))-dv2

   CVw = (1/(Ns1^2))*j1*(Kbgg:*Kbv2v2)*j1' - (2/(Ns1*(Ns1-1)))*j1*(Kgg:*Kv2v2)*j1'

   if(CVw<CVwmin) {
       CVwmin = CVw
       Cwopt1 = Cw
   }
   i++
}

Cwopt1

Cw = Cwopt1

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j1'*j1-(1/(hw*Cw*sd_g)^2:*gg1:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg1:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j1'*j1-(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg1:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_1:^2))

   S11 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:^2)) - diag(Kgg:*Kv2v2:*(age_xx_1:^2)) )*j1'
   S12 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*educ_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*educ_xx_1)) )*j1'
   S13 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*afqt_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*afqt_xx_1)) )*j1'
   S14 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*mmar_xx_1)) )*j1'
   S22 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_1:^2)) )*j1'
   S23 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:*afqt_xx_1)) - diag(Kgg:*Kv2v2:*(educ_xx_1:*afqt_xx_1)) )*j1'
   S24 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(educ_xx_1:*mmar_xx_1)) )*j1'
   S33 = j1*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_1:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_1:^2)) )*j1'
   S34 = j1*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(afqt_xx_1:*mmar_xx_1)) )*j1'
   S44 = j1*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_1:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_1:^2)) )*j1'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S1 = S/(Ns1*(Ns1-1)/2)

sd_g = variance(gopt2)^(1/2)
sd_v2 = variance(v22)^(1/2)

Cw = 0.00000001
Cwopt2 = Cw
CVwmin = 1000000000000000

i=1
while(i<=40) {
   Cw = Cw+0.01

   Kbgg = (1/(hw*Cw*sd_g))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_g)^2:*gg2:^2))
   Kbv2v2 = (1/(hw*Cw*sd_v2))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))

   dg=I(Ns2)*(1/(hw*Cw*sd_g))*(2*pi())^(-1/2)
   dv2=I(Ns2)*(1/(hw*Cw*sd_v2))*(2*pi())^(-1/2)
   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg2:^2))-dg
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))-dv2

   CVw = (1/(Ns2^2))*j2*(Kbgg:*Kbv2v2)*j2' - (2/(Ns2*(Ns2-1)))*j2*(Kgg:*Kv2v2)*j2'

   if(CVw<CVwmin) {
       CVwmin = CVw
       Cwopt2 = Cw
   }
   i++
}

Cwopt2

Cw = Cwopt2

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j2'*j2-(1/(hw*Cw*sd_g)^2:*gg2:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg2:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j2'*j2-(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))


   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg2:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_2:^2))

   S11 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:^2)) - diag(Kgg:*Kv2v2:*(age_xx_2:^2)) )*j2'
   S12 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*educ_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*educ_xx_2)) )*j2'
   S13 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*afqt_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*afqt_xx_2)) )*j2'
   S14 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*mmar_xx_2)) )*j2'
   S22 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:^2)) )*j2'
   S23 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:*afqt_xx_2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:*afqt_xx_2)) )*j2'
   S24 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:*mmar_xx_2)) )*j2'
   S33 = j2*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_2:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_2:^2)) )*j2'
   S34 = j2*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(afqt_xx_2:*mmar_xx_2)) )*j2'
   S44 = j2*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_2:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_2:^2)) )*j2'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S2 = S/(Ns2*(Ns2-1)/2)

sd_g = variance(gopt3)^(1/2)
sd_v2 = variance(v23)^(1/2)

Cw = 0.00000001
Cwopt3 = Cw
CVwmin = 1000000000000000

i=1
while(i<=40) {
   Cw = Cw+0.01

   Kbgg = (1/(hw*Cw*sd_g))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_g)^2:*gg3:^2))
   Kbv2v2 = (1/(hw*Cw*sd_v2))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))

   dg=I(Ns3)*(1/(hw*Cw*sd_g))*(2*pi())^(-1/2)
   dv2=I(Ns3)*(1/(hw*Cw*sd_v2))*(2*pi())^(-1/2)
   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg3:^2))-dg
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))-dv2

   CVw = (1/(Ns3^2))*j3*(Kbgg:*Kbv2v2)*j3' - (2/(Ns3*(Ns3-1)))*j3*(Kgg:*Kv2v2)*j3'

   if(CVw<CVwmin) {
       CVwmin = CVw
       Cwopt3 = Cw
   }
   i++
}

Cwopt3

Cw = Cwopt3

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j3'*j3-(1/(hw*Cw*sd_g)^2:*gg3:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg3:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j3'*j3-(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg3:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_3:^2))

   S11 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:^2)) - diag(Kgg:*Kv2v2:*(age_xx_3:^2)) )*j3'
   S12 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*educ_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*educ_xx_3)) )*j3'
   S13 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*afqt_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*afqt_xx_3)) )*j3'
   S14 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*mmar_xx_3)) )*j3'
   S22 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_3:^2)) )*j3'
   S23 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:*afqt_xx_3)) - diag(Kgg:*Kv2v2:*(educ_xx_3:*afqt_xx_3)) )*j3'
   S24 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(educ_xx_3:*mmar_xx_3)) )*j3'
   S33 = j3*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_3:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_3:^2)) )*j3'
   S34 = j3*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(afqt_xx_3:*mmar_xx_3)) )*j3'
   S44 = j3*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_3:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_3:^2)) )*j3'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S3 = S/(Ns3*(Ns3-1)/2)

sd_g = variance(gopt4)^(1/2)
sd_v2 = variance(v24)^(1/2)

Cw = 0.00000001
Cwopt4 = Cw
CVwmin = 1000000000000000

i=1
while(i<=40) {
   Cw = Cw+0.01

   Kbgg = (1/(hw*Cw*sd_g))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_g)^2:*gg4:^2))
   Kbv2v2 = (1/(hw*Cw*sd_v2))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))

   dg=I(Ns4)*(1/(hw*Cw*sd_g))*(2*pi())^(-1/2)
   dv2=I(Ns4)*(1/(hw*Cw*sd_v2))*(2*pi())^(-1/2)
   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg4:^2))-dg
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))-dv2

   CVw = (1/(Ns4^2))*j4*(Kbgg:*Kbv2v2)*j4' - (2/(Ns4*(Ns4-1)))*j4*(Kgg:*Kv2v2)*j4'

   if(CVw<CVwmin) {
       CVwmin = CVw
       Cwopt4 = Cw
   }
   i++
}

Cwopt4

Cw = Cwopt4

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j4'*j4-(1/(hw*Cw*sd_g)^2:*gg4:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg4:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j4'*j4-(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg4:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_4:^2))

   S11 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:^2)) - diag(Kgg:*Kv2v2:*(age_xx_4:^2)) )*j4'
   S12 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*educ_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*educ_xx_4)) )*j4'
   S13 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*afqt_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*afqt_xx_4)) )*j4'
   S14 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*mmar_xx_4)) )*j4'
   S22 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_4:^2)) )*j4'
   S23 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:*afqt_xx_4)) - diag(Kgg:*Kv2v2:*(educ_xx_4:*afqt_xx_4)) )*j4'
   S24 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(educ_xx_4:*mmar_xx_4)) )*j4'
   S33 = j4*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_4:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_4:^2)) )*j4'
   S34 = j4*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(afqt_xx_4:*mmar_xx_4)) )*j4'
   S44 = j4*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_4:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_4:^2)) )*j4'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S4 = S/(Ns4*(Ns4-1)/2)

sd_g = variance(gopt5)^(1/2)
sd_v2 = variance(v25)^(1/2)

Cw = 0.00000001
Cwopt5 = Cw
CVwmin = 1000000000000000

i=1
while(i<=40) {
   Cw = Cw+0.01

   Kbgg = (1/(hw*Cw*sd_g))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_g)^2:*gg5:^2))
   Kbv2v2 = (1/(hw*Cw*sd_v2))*(4*pi())^(-1/2):*exp(-1/4:*(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))

   dg=I(Ns5)*(1/(hw*Cw*sd_g))*(2*pi())^(-1/2)
   dv2=I(Ns5)*(1/(hw*Cw*sd_v2))*(2*pi())^(-1/2)
   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg5:^2))-dg
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))-dv2

   CVw = (1/(Ns5^2))*j5*(Kbgg:*Kbv2v2)*j5' - (2/(Ns5*(Ns5-1)))*j5*(Kgg:*Kv2v2)*j5'

   if(CVw<CVwmin) {
       CVwmin = CVw
       Cwopt5 = Cw
   }
   i++
}

Cwopt5

Cw = Cwopt5

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j5'*j5-(1/(hw*Cw*sd_g)^2:*gg5:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg5:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j5'*j5-(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg5:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_5:^2))

   S11 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:^2)) - diag(Kgg:*Kv2v2:*(age_xx_5:^2)) )*j5'
   S12 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*educ_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*educ_xx_5)) )*j5'
   S13 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*afqt_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*afqt_xx_5)) )*j5'
   S14 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*mmar_xx_5)) )*j5'
   S22 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_5:^2)) )*j5'
   S23 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:*afqt_xx_5)) - diag(Kgg:*Kv2v2:*(educ_xx_5:*afqt_xx_5)) )*j5'
   S24 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(educ_xx_5:*mmar_xx_5)) )*j5'
   S33 = j5*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_5:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_5:^2)) )*j5'
   S34 = j5*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(afqt_xx_5:*mmar_xx_5)) )*j5'
   S44 = j5*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_5:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_5:^2)) )*j5'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S5 = S/(Ns5*(Ns5-1)/2)

   ST = S1 + S2 + S3 + S4 + S5

S1
S2
S3
S4
S5

   ST

   eval = .
   evec = .
   symeigensystem(ST, evec, eval)

   b_tilde = evec[.,4]
   b = Omega_inv_sqrt * b_tilde
   bsc= b/b[1,1]

b
bsc

end
more;

use "data\NLSY79_bin_sel_bootstrap.dta", clear;

********************************************************************************************
* BOOTSTRAP;

mata;

B2_CORR=J(200,3,0)

r=1

do {

r

stata ("preserve")
stata ("bsample, cluster(id)")
stata ("sort id year")

stata ("qui tobit y2 age educ married afqt_1 mmarried if year==1, ll(0)")
stata ("predict y2hat1")
stata ("gen v2hat=y2-y2hat1 if y2>0 & year==1")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==2, ll(0)")
stata ("predict y2hat2")
stata ("replace v2hat=y2-y2hat2 if y2>0 & year==2")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==3, ll(0)")
stata ("predict y2hat3")
stata ("replace v2hat=y2-y2hat3 if y2>0 & year==3")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==4, ll(0)")
stata ("predict y2hat4")
stata ("replace v2hat=y2-y2hat4 if y2>0 & year==4")
stata ("qui tobit y2 age educ married afqt_1 mmarried if year==5, ll(0)")
stata ("predict y2hat5")
stata ("replace v2hat=y2-y2hat5 if y2>0 & year==5")

stata ("qui drop y2hat1 y2hat2 y2hat3 y2hat4 y2hat5")

stata ("* USE LEAST SQUARES CROSS-VALIDATION TO FIND OPTIMAL BANDWIDTH FOR COMPUTING NONPARAMETRIC CONDITIONAL EXPECTATION g *")
stata ("*** NOTE: 2nd and 3rd arguments in st_view(allv=.,.,.) command below relate to obs (rows) and vars (columns), 0 means omit obs with missing data")

stata ("qui gen h_g=N^(-1/9)")
stata ("qui gen h_w=N^(-1/6)")

stata ("qui sum y1 if y1~=. & year==1")
stata ("qui gen Ns1=r(N)")

stata ("qui sum y1 if y1~=. & year==2")
stata ("qui gen Ns2=r(N)")

stata ("qui sum y1 if y1~=. & year==3")
stata ("qui gen Ns3=r(N)")

stata ("qui sum y1 if y1~=. & year==4")
stata ("qui gen Ns4=r(N)")

stata ("qui sum y1 if y1~=. & year==5")
stata ("qui gen Ns5=r(N)")

stata ("qui replace h_g=. if y1==.")
stata ("qui replace h_w=. if y1==.")
stata ("qui replace age=. if y1==.")
stata ("qui replace educ=. if y1==.")
stata ("qui replace afqt_1=. if y1==.")
stata ("qui replace mmarried=. if y1==.")
stata ("qui replace v2hat=. if y1==.")
stata ("qui replace Ns1=. if y1==.")
stata ("qui replace Ns2=. if y1==.")
stata ("qui replace Ns3=. if y1==.")
stata ("qui replace Ns4=. if y1==.")
stata ("qui replace Ns5=. if y1==.")

stata ("order h_g h_w y1 age educ afqt_1 mmarried v2hat Ns1 Ns2 Ns3 Ns4 Ns5")
stata ("sort year id")

st_view(allv=.,.,1..13,0)
st_subview(h_g=.,allv,.,1)
st_subview(h_w=.,allv,.,2)
st_subview(y=.,allv,.,3)
st_subview(v2=.,allv,.,8)
st_subview(Ns1=.,allv,.,9)
st_subview(Ns2=.,allv,.,10)
st_subview(Ns3=.,allv,.,11)
st_subview(Ns4=.,allv,.,12)
st_subview(Ns5=.,allv,.,13)

st_subview(Xvars=.,allv,.,4..7)

hg=h_g[1]
hw=h_w[1]
Ns1=Ns1[1]
Ns2=Ns2[1]
Ns3=Ns3[1]
Ns4=Ns4[1]
Ns5=Ns5[1]

Omega_inv = cholinv(variance(Xvars))
Omega_inv_sqrt = cholesky(Omega_inv)
Xvars_tilde = Xvars * Omega_inv_sqrt

age = Xvars_tilde[.,1]
educ = Xvars_tilde[.,2]
afqt = Xvars_tilde[.,3]
mmar = Xvars_tilde[.,4]

y1=y[1..Ns1,1]
y2=y[Ns1+1..Ns1+Ns2,1]
y3=y[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
y4=y[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
y5=y[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

age1=age[1..Ns1,1]
age2=age[Ns1+1..Ns1+Ns2,1]
age3=age[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
age4=age[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
age5=age[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

educ1=educ[1..Ns1,1]
educ2=educ[Ns1+1..Ns1+Ns2,1]
educ3=educ[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
educ4=educ[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
educ5=educ[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

afqt1=afqt[1..Ns1,1]
afqt2=afqt[Ns1+1..Ns1+Ns2,1]
afqt3=afqt[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
afqt4=afqt[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
afqt5=afqt[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

mmar1=mmar[1..Ns1,1]
mmar2=mmar[Ns1+1..Ns1+Ns2,1]
mmar3=mmar[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
mmar4=mmar[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
mmar5=mmar[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

v21=v2[1..Ns1,1]
v22=v2[Ns1+1..Ns1+Ns2,1]
v23=v2[Ns1+Ns2+1..Ns1+Ns2+Ns3,1]
v24=v2[Ns1+Ns2+Ns3+1..Ns1+Ns2+Ns3+Ns4,1]
v25=v2[Ns1+Ns2+Ns3+Ns4+1..Ns1+Ns2+Ns3+Ns4+Ns5,1]

sd_age = variance(age1)^(1/2)
sd_educ = variance(educ1)^(1/2)
sd_afqt = variance(afqt1)^(1/2)
sd_mmar = variance(mmar1)^(1/2)
sd_v2 = variance(v21)^(1/2)

j1=J(1,Ns1,1)
age_xx_1=(age1*j1):-age1'
educ_xx_1=(educ1*j1):-educ1'
afqt_xx_1=(afqt1*j1):-afqt1'
mmar_xx_1=(mmar1*j1):-mmar1'
v2_xx_1=(v21*j1):-v21'
d1=I(Ns1)*(2*pi())^(-1/2)

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_age)^2:*age_xx_1:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_educ)^2:*educ_xx_1:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_afqt)^2:*afqt_xx_1:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_mmar)^2:*mmar_xx_1:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt1*sd_v2)^2:*v2_xx_1:^2))
gopt1 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y1):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j1')

sd_age = variance(age2)^(1/2)
sd_educ = variance(educ2)^(1/2)
sd_afqt = variance(afqt2)^(1/2)
sd_mmar = variance(mmar2)^(1/2)
sd_v2 = variance(v22)^(1/2)

j2=J(1,Ns2,1)
age_xx_2=(age2*j2):-age2'
educ_xx_2=(educ2*j2):-educ2'
afqt_xx_2=(afqt2*j2):-afqt2'
mmar_xx_2=(mmar2*j2):-mmar2'
v2_xx_2=(v22*j2):-v22'
d2=I(Ns2)*(2*pi())^(-1/2)

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_age)^2:*age_xx_2:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_educ)^2:*educ_xx_2:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_afqt)^2:*afqt_xx_2:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_mmar)^2:*mmar_xx_2:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt2*sd_v2)^2:*v2_xx_2:^2))
gopt2 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y2):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j2')

sd_age = variance(age3)^(1/2)
sd_educ = variance(educ3)^(1/2)
sd_afqt = variance(afqt3)^(1/2)
sd_mmar = variance(mmar3)^(1/2)
sd_v2 = variance(v23)^(1/2)

j3=J(1,Ns3,1)
age_xx_3=(age3*j3):-age3'
educ_xx_3=(educ3*j3):-educ3'
afqt_xx_3=(afqt3*j3):-afqt3'
mmar_xx_3=(mmar3*j3):-mmar3'
v2_xx_3=(v23*j3):-v23'
d3=I(Ns3)*(2*pi())^(-1/2)

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_age)^2:*age_xx_3:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_educ)^2:*educ_xx_3:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_afqt)^2:*afqt_xx_3:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_mmar)^2:*mmar_xx_3:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt3*sd_v2)^2:*v2_xx_3:^2))
gopt3 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y3):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j3')

sd_age = variance(age4)^(1/2)
sd_educ = variance(educ4)^(1/2)
sd_afqt = variance(afqt4)^(1/2)
sd_mmar = variance(mmar4)^(1/2)
sd_v2 = variance(v24)^(1/2)

j4=J(1,Ns4,1)
age_xx_4=(age4*j4):-age4'
educ_xx_4=(educ4*j4):-educ4'
afqt_xx_4=(afqt4*j4):-afqt4'
mmar_xx_4=(mmar4*j4):-mmar4'
v2_xx_4=(v24*j4):-v24'
d4=I(Ns4)*(2*pi())^(-1/2)

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_age)^2:*age_xx_4:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_educ)^2:*educ_xx_4:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_afqt)^2:*afqt_xx_4:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_mmar)^2:*mmar_xx_4:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt4*sd_v2)^2:*v2_xx_4:^2))
gopt4 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y4):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j4')

sd_age = variance(age5)^(1/2)
sd_educ = variance(educ5)^(1/2)
sd_afqt = variance(afqt5)^(1/2)
sd_mmar = variance(mmar5)^(1/2)
sd_v2 = variance(v25)^(1/2)

j5=J(1,Ns5,1)
age_xx_5=(age5*j5):-age5'
educ_xx_5=(educ5*j5):-educ5'
afqt_xx_5=(afqt5*j5):-afqt5'
mmar_xx_5=(mmar5*j5):-mmar5'
v2_xx_5=(v25*j5):-v25'
d5=I(Ns5)*(2*pi())^(-1/2)

Kage_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_age)^2:*age_xx_5:^2))
Keduc_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_educ)^2:*educ_xx_5:^2))
Kafqt_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_afqt)^2:*afqt_xx_5:^2))
Kmmar_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_mmar)^2:*mmar_xx_5:^2))
Kv2_xx = (2*pi())^(-1/2):*exp(-1/2:*(1/(hg*Cgopt5*sd_v2)^2:*v2_xx_5:^2))
gopt5 = ((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*y5):/((Kage_xx:*Keduc_xx:*Kafqt_xx:*Kmmar_xx:*Kv2_xx)*j5')

gg1=(gopt1*j1):-gopt1'
gg2=(gopt2*j2):-gopt2'
gg3=(gopt3*j3):-gopt3'
gg4=(gopt4*j4):-gopt4'
gg5=(gopt5*j5):-gopt5'

sd_g = variance(gopt1)^(1/2)
sd_v2 = variance(v21)^(1/2)

Cw = Cwopt1

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j1'*j1-(1/(hw*Cw*sd_g)^2:*gg1:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg1:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j1'*j1-(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_1:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg1:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_1:^2))

   S11 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:^2)) - diag(Kgg:*Kv2v2:*(age_xx_1:^2)) )*j1'
   S12 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*educ_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*educ_xx_1)) )*j1'
   S13 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*afqt_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*afqt_xx_1)) )*j1'
   S14 = j1*( uppertriangle(Kgg:*Kv2v2:*(age_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(age_xx_1:*mmar_xx_1)) )*j1'
   S22 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_1:^2)) )*j1'
   S23 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:*afqt_xx_1)) - diag(Kgg:*Kv2v2:*(educ_xx_1:*afqt_xx_1)) )*j1'
   S24 = j1*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(educ_xx_1:*mmar_xx_1)) )*j1'
   S33 = j1*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_1:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_1:^2)) )*j1'
   S34 = j1*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_1:*mmar_xx_1)) - diag(Kgg:*Kv2v2:*(afqt_xx_1:*mmar_xx_1)) )*j1'
   S44 = j1*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_1:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_1:^2)) )*j1'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S1 = S/(Ns1*(Ns1-1)/2)

sd_g = variance(gopt2)^(1/2)
sd_v2 = variance(v22)^(1/2)

Cw = Cwopt2

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j2'*j2-(1/(hw*Cw*sd_g)^2:*gg2:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg2:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j2'*j2-(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_2:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg2:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_2:^2))

   S11 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:^2)) - diag(Kgg:*Kv2v2:*(age_xx_2:^2)) )*j2'
   S12 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*educ_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*educ_xx_2)) )*j2'
   S13 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*afqt_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*afqt_xx_2)) )*j2'
   S14 = j2*( uppertriangle(Kgg:*Kv2v2:*(age_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(age_xx_2:*mmar_xx_2)) )*j2'
   S22 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:^2)) )*j2'
   S23 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:*afqt_xx_2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:*afqt_xx_2)) )*j2'
   S24 = j2*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(educ_xx_2:*mmar_xx_2)) )*j2'
   S33 = j2*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_2:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_2:^2)) )*j2'
   S34 = j2*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_2:*mmar_xx_2)) - diag(Kgg:*Kv2v2:*(afqt_xx_2:*mmar_xx_2)) )*j2'
   S44 = j2*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_2:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_2:^2)) )*j2'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S2 = S/(Ns2*(Ns2-1)/2)

sd_g = variance(gopt3)^(1/2)
sd_v2 = variance(v23)^(1/2)

Cw = Cwopt3

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j3'*j3-(1/(hw*Cw*sd_g)^2:*gg3:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg3:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j3'*j3-(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_3:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg3:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_3:^2))

   S11 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:^2)) - diag(Kgg:*Kv2v2:*(age_xx_3:^2)) )*j3'
   S12 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*educ_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*educ_xx_3)) )*j3'
   S13 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*afqt_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*afqt_xx_3)) )*j3'
   S14 = j3*( uppertriangle(Kgg:*Kv2v2:*(age_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(age_xx_3:*mmar_xx_3)) )*j3'
   S22 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_3:^2)) )*j3'
   S23 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:*afqt_xx_3)) - diag(Kgg:*Kv2v2:*(educ_xx_3:*afqt_xx_3)) )*j3'
   S24 = j3*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(educ_xx_3:*mmar_xx_3)) )*j3'
   S33 = j3*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_3:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_3:^2)) )*j3'
   S34 = j3*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_3:*mmar_xx_3)) - diag(Kgg:*Kv2v2:*(afqt_xx_3:*mmar_xx_3)) )*j3'
   S44 = j3*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_3:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_3:^2)) )*j3'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S3 = S/(Ns3*(Ns3-1)/2)

sd_g = variance(gopt4)^(1/2)
sd_v2 = variance(v24)^(1/2)

Cw = Cwopt4

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j4'*j4-(1/(hw*Cw*sd_g)^2:*gg4:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg4:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j4'*j4-(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_4:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg4:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_4:^2))

   S11 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:^2)) - diag(Kgg:*Kv2v2:*(age_xx_4:^2)) )*j4'
   S12 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*educ_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*educ_xx_4)) )*j4'
   S13 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*afqt_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*afqt_xx_4)) )*j4'
   S14 = j4*( uppertriangle(Kgg:*Kv2v2:*(age_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(age_xx_4:*mmar_xx_4)) )*j4'
   S22 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_4:^2)) )*j4'
   S23 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:*afqt_xx_4)) - diag(Kgg:*Kv2v2:*(educ_xx_4:*afqt_xx_4)) )*j4'
   S24 = j4*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(educ_xx_4:*mmar_xx_4)) )*j4'
   S33 = j4*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_4:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_4:^2)) )*j4'
   S34 = j4*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_4:*mmar_xx_4)) - diag(Kgg:*Kv2v2:*(afqt_xx_4:*mmar_xx_4)) )*j4'
   S44 = j4*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_4:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_4:^2)) )*j4'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S4 = S/(Ns4*(Ns4-1)/2)

sd_g = variance(gopt5)^(1/2)
sd_v2 = variance(v25)^(1/2)

Cw = Cwopt5

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*(1/2:*(3*j5'*j5-(1/(hw*Cw*sd_g)^2:*gg5:^2))):*exp(-1/2:*(1/(hw*Cw*sd_g)^2:*gg5:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*(1/2:*(3*j5'*j5-(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))):*exp(-1/2:*(1/(hw*Cw*sd_v2)^2:*v2_xx_5:^2))

   Kgg = (1/(hw*Cw*sd_g))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_g):*gg5:^2))
   Kv2v2 = (1/(hw*Cw*sd_v2))*(2*pi())^(-1/2):*exp(-1/2:*(1/(hw*Cw*sd_v2):*v2_xx_5:^2))

   S11 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:^2)) - diag(Kgg:*Kv2v2:*(age_xx_5:^2)) )*j5'
   S12 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*educ_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*educ_xx_5)) )*j5'
   S13 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*afqt_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*afqt_xx_5)) )*j5'
   S14 = j5*( uppertriangle(Kgg:*Kv2v2:*(age_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(age_xx_5:*mmar_xx_5)) )*j5'
   S22 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:^2)) - diag(Kgg:*Kv2v2:*(educ_xx_5:^2)) )*j5'
   S23 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:*afqt_xx_5)) - diag(Kgg:*Kv2v2:*(educ_xx_5:*afqt_xx_5)) )*j5'
   S24 = j5*( uppertriangle(Kgg:*Kv2v2:*(educ_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(educ_xx_5:*mmar_xx_5)) )*j5'
   S33 = j5*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_5:^2)) - diag(Kgg:*Kv2v2:*(afqt_xx_5:^2)) )*j5'
   S34 = j5*( uppertriangle(Kgg:*Kv2v2:*(afqt_xx_5:*mmar_xx_5)) - diag(Kgg:*Kv2v2:*(afqt_xx_5:*mmar_xx_5)) )*j5'
   S44 = j5*( uppertriangle(Kgg:*Kv2v2:*(mmar_xx_5:^2)) - diag(Kgg:*Kv2v2:*(mmar_xx_5:^2)) )*j5'

   S = J(4,4,1)
   S[1,1] = S11
   S[1,2] = S12
   S[1,3] = S13
   S[1,4] = S14
   S[2,1] = S12
   S[2,2] = S22
   S[2,3] = S23
   S[2,4] = S24
   S[3,1] = S13
   S[3,2] = S23
   S[3,3] = S33
   S[3,4] = S34
   S[4,1] = S14
   S[4,2] = S24
   S[4,3] = S34
   S[4,4] = S44

   S5 = S/(Ns5*(Ns5-1)/2)

   ST = S1 + S2 + S3 + S4 + S5

   eval = .
   evec = .
   symeigensystem(ST, evec, eval)

   b_tilde = evec[.,4]
   b = Omega_inv_sqrt * b_tilde
   bsc= b/b[1,1]

b
bsc

B2_CORR[r,1]=bsc[2,1]
B2_CORR[r,2]=bsc[3,1]
B2_CORR[r,3]=bsc[4,1]

st_matrix("b2_corr",B2_CORR)
stata ("restore")

   r++
 } while(r<=200)

end

svmat b2_corr;
rename b2_corr1 b2_sp;
rename b2_corr2 b3_sp;
rename b2_corr3 b4_sp;

drop if b2_sp==.;

keep b2_sp b3_sp b4_sp;
sum b2_sp b3_sp b4_sp if b2_sp~=0;

save data\semiparam_bootstrap.dta, replace;

log close;