/*****************************************************************************/
/* "ASSESSING THE PERFORMANCE OF MATCHING ALGORITHMS WHEN ... "              */
/* Gauss File:  post13.G                                                     */
/*****************************************************************************/

new;


/*****************************************************************************
 Outer iterations over all degrees and narrow and broad model
*****************************************************************************/

/* Iteration over all matching setups in folders f1 to f8 */
iter = 3;
do while iter<=3;


/* Broad (0) or narrow (1) probit model */
narrow = 0;

/* College degree to evaluate
0: AA
1: BA
2: MA+
*/
coll = 1;

/* Sex
0: women
1: men
*/
men = 1;


/*****************************************************************************
 Do the following for all aftermax years
*****************************************************************************/

aftermax = 1;

/* Matrix of matching results */
table1 = zeros(2*aftermax,7);
/*
01: effect of pure matching, effect on the treated
    (std.error)
02: effect of adjusted matching, effect on the treated
    (std.error)
03: effect of pure matching, effect on a randomly assigned person
    (std.error)
04: effect of adjusted matching, effect on a randomly assigned person
    (std.error)
05: number of strata
    number of treated
06: -- empty --
    number of controls
07: mean number of treated in strata with more than one treated
    maximum number of treated in strata with more than one treated
*/

/* Matrix of OLS results */
table2 = zeros(2*aftermax,8);
/*
01: OLS coefficient of treatment dummy using the strata observations, weighting A: straum weights
02: effect of treatment in OLS using the strata observations, weighting A: straum weights
03: effect of treatment in OLS w/ interactions treatment*exp using the strata observations, weighting A: straum weights
04: OLS on p.score polynomial, weighting A: stratum weights
05: OLS coefficient of treatment dummy using the strata observations, weighting B: sample weights
06: effect of treatment in OLS using the strata observations, weighting B: sample weights
07: effect of treatment in OLS w/ interactions treatment*exp using the strata observations, weighting B: sample weights
08: OLS on p.score polynomial, weighting B: sample weights
*/

/* Matrix of p-values of matching results -- not reported in P_Opt3 */
ptable1 = zeros(aftermax,4);
/*
01: effect of pure matching, effect on the treated
02: effect of adjusted matching, effect on the treated
03: effect of pure matching, effect on a randomly assigned person
04: effect of adjusted matching, effect on a randomly assigned person
*/

/* Matrix of p-values of OLS results -- not reported in P_Opt3 */
ptable2 = zeros(aftermax,8);
/*
01: OLS coefficient of treatment dummy using the strata observations, weighting A: straum weights
02: effect of treatment in OLS using the strata observations, weighting A: straum weights
03: effect of treatment in OLS w/ interactions treatment*exp using the strata observations, weighting A: straum weights
04: OLS on p.score polynomial, weighting A: straum weights
05: OLS coefficient of treatment dummy using the strata observations, weighting B: sample weights
06: effect of treatment in OLS using the strata observations, weighting B: sample weights
07: effect of treatment in OLS w/ interactions treatment*exp using the strata observations, weighting B: sample weights
08: OLS on p.score polynomial, weighting B: sample weights
*/

/* Matrix of heterogeneity effects, all years and the pooled model -- not reported in P_Opt3 */
tabhet = zeros(2*aftermax+2,9);
/*
01-03: pure matching heterogeneity effects, usual weighting
04-06: adjusted matching heterogeneity effects, usual weighting
07-09: OLS heterogeneity effects using the strata observations, weighting A
*/

/* Matrix of p-values of heterogeneity effects, all years and the pooled model -- not reported in P_Opt3 */
ptabhet = zeros(aftermax+1,9);
/*
01-03: pure matching heterogeneity effects, usual weighting
04-06: adjusted matching heterogeneity effects, usual weighting
07-09: OLS heterogeneity effects using the strata observations, weighting A
*/

/* Brackets for standard errors, all years and the pooled model */
pth = 0|"(";
j = 1;
do while j<=aftermax+2;
    pth = pth|0|"(";
    j = j + 1;
endo;

/* Stars for significance levels */
st1   = zeros(2*aftermax,4);
st2   = zeros(2*aftermax,12);
sthet = zeros(2*aftermax+2,9);


/*****************************************************************************
 Load the original unmatched data
*****************************************************************************/

/* Load the data */

if narrow==0;
    if men==0;
        if coll==0;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bwa.asc";
            print "Broad probit, Women, Associate's";
            /* Propensity score caliper */
        elseif coll==1;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bwb.asc";
            print "Broad probit, Women, Bachelor's";
        elseif coll==2;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bwm.asc";
            print "Broad probit, Women, Master's";
            endif;
    elseif men==1;
        if coll==0;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bma.asc";
            print "Broad probit, Men, Associate's";
        elseif coll==1;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bmb.asc";
            print "Broad probit, Men, Bachelor's";
        elseif coll==2;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\bmm.asc";
            print "Broad probit, Men, Master's";
        endif;
    endif;
elseif narrow==1;
    if men==0;
        if coll==0;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nwa.asc";
            print "Narrow probit, Women, Associate's";
        elseif coll==1;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nwb.asc";
            print "Narrow probit, Women, Bachelor's";
        elseif coll==2;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nwm.asc";
            print "Narrow probit, Women, Master's";
        endif;
    elseif men==1;
        if coll==0;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nma.asc";
            print "Narrow probit, Men, Associate's";
        elseif coll==1;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nmb.asc";
            print "Narrow probit, Men, Bachelor's";
        elseif coll==2;
            load S_OLD[] = "D:\\Matching Extremes\\stata\\data\\nmm.asc";
            print "Narrow probit, Men, Master's";
        endif;
    endif;
endif;


if narrow>=1;
    k = 97;                                        /* number of variables */
elseif narrow==0;
    k = 105;                                       /* number of variables */
endif;

n     = rows(S_old)/k;                                      /* number of observations */
S_old = reshape(S_old,n,k);

/*
S_old contains the variables:
01: id
02: t, treatment dummy
03: p, estimated propensity score
04: ind, estimated index score
05: sample weight of 1979
06: black dummy
07: hisp dummy
08: age 1979
09: year HS-diploma
10: yrdeg2, year received highest degree
11: wage79
...
27: wage95
28: exp79, actual experience
...
44: exp95
45: exps79, experience while enrolled in school
...
61: exps95
62: cexpm79, cumulated missing info dummies of expXX until year XX
...
78: cexpm95
79: cexpsm79, cumulated missing info dummies of expsXX until year XX
...
95: cexpsm95

Matching variables of the broad probit model:
96 96: ASVAB: math knowledge
** 97: ASVAB: auto + shop information
** 98: private school
** 99: expelled or suspended from school
** 100: curriculum college preparatory
** 101: curriculum general
97 102: highest grades of parents
** 103: occupation parents high
** 104: number of siblings
** 105: born in south
*/

/* If sample weights are not to be used, change S[.,4] into */
/* S_old[.,5] = ones(n,1); */

/* Choosing the covariates X which also contain the id and the black and hisp dummies. */
X = S_old[.,1 8:9 96:cols(S_old) 6:7];

/*
X contains the variables:
01: id
02: age 1979
03: year HS-diploma

04 04: ASVAB: math knowledge
** 05: ASVAB: auto + shop information
** 06: private school
** 07: expelled or suspended from school
** 08: curriculum college preparatory
** 09: curriculum general
05 10: highest grades of parents
** 11: occupation parents high
** 12: number of siblings
** 13: born in south
06 14: black dummy
07 15: hisp dummy
*/

/* Select the treated */
X1 = selif(X,S_old[.,2]);

/* Select the untreated */
X0 = selif(X,1-S_old[.,2]);

/* Choosing experience variables */
EX1 = selif(S_old[.,28:43 45:60 62:77 79:94],S_old[.,2]);
EX0 = selif(S_old[.,28:43 45:60 62:77 79:94 5],1-S_old[.,2]);
/* EX0 additionally contains the sample weight of the untreated */

/*
EX contains the variables
01: exp79, actual experience
...
16: exp94
17: exps79, experience while enrolled in school
...
32: exps94
33: cexpm79, cumulated missing info dummies of expXX until year XX
...
48: cexpm94
49: cexpsm79, cumulated missing info dummies of expsXX until year XX
...
64: cexpsm94
(65: untreated: sample weight 1979)
*/


/* Balance of covariates before matching */
/* Untreated */
balbef0  = selif(S_old[.,3 4 6:9 96:cols(S_old)],S_old[.,2].==0);
mbalbef0 = meanc(balbef0);
/* Treated */
balbef1  = selif(S_old[.,3 4 6:9 96:cols(S_old)],S_old[.,2].==1);
mbalbef1 = meanc(balbef1);

/* In the following t-test, the number of untreated is only counted up to the number of
treated. This decreases the overall degrees of freedom. The reason is that balance after
matching will also have an equal number of treated and controls (the number of strata)
and so this t-test will be conservative in that it tends to label two variables as
balanced even though in a more precise test it would not. */

ttest0 = (2*cdftc(abs((mbalbef0 - mbalbef1)./
          sqrt((sumc((balbef0 - mbalbef0')^2)/(rows(balbef0)-1)
          + sumc((balbef1 - mbalbef1')^2)/(rows(balbef1)-1))/rows(balbef1))),2*(rows(balbef1)-1)) .> 0.05);

balbef = mbalbef0~mbalbef1;

/* Prepare matrix for balance of covariates after matching */
balaft = zeros(rows(balbef),2*aftermax);

/* t-test of equality of means of treated and controls */
ttest = zeros(rows(balbef),aftermax);

/* Percent bias reduction per variable */
balperc = zeros(rows(balbef),aftermax);


/*****************************************************************************
 Start iterations
*****************************************************************************/

after = 1;

do while after<=aftermax;


/*****************************************************************************
 Preparations
*****************************************************************************/

str1  = "D:\\Matching Extremes\\Sas\\f";
str1b = ftos(iter,"%*.*lf",1,0);
str1c = "\\arcout";
str2  = ftos(after,"%*.*lf",1,0);
str3  = ".txt";
str4  = str1$+str1b$+str1c$+str2$+str3;

load S[] = ^str4;

str1  = "D:\\Matching Extremes\\Arcinfo\\f";
str1c = "\\arcinf";
str3  = ".out";
str4  = str1$+str1b$+str1c$+str2$+str3;

load T[] = ^str4;

/* With the new arcoutxx.txt files there is no more a column of observation numbers. */

nS = rows(S)/3;                                /* number of observations in S */
S = reshape(S,nS,3);
/*
S contains the variables:
01: start node - id's or supersource 0 (_from_)
02: end node   - id's or supersource 0 (_to_)
03: flows
*/

nT = rows(T)/10;                                /* number of observations in T */
T = reshape(T,nT,10);
/*
T contains the variables:
01: wage of treated (of respective arc) in year ... after college
02: wage of untreated (of respective arc) in year ... after college
03: prop.score of treated (ot respective arc)
04: prop.score of untreated (of respective arc)
05: index score of treated (ot respective arc)
06: index score of untreated (of respective arc)
07: sample weights of treated
08: yrdeg2 of treated
09: experience of treated
10: experience of untreated
*/

/* log wages */
T[.,1:2] = ln(T[.,1:2]);

/* delete the first row of S which contained the variable names */
arcs = S[2:rows(S),.];

/* Number of treated */
n_t = rows(selif(arcs,arcs[.,1].<0.5));
/* Number of controls */
n_c = rows(selif(arcs,arcs[.,2].<0.5));

/* Find the controls who have more than one treated, add one emergency row of zeros if there are no such controls. */
if rows(delif(arcs,arcs[.,2].<0.9 .AND arcs[.,3].>0.1))==rows(arcs);
   oft0 = zeros(1,3);
elseif rows(delif(arcs,arcs[.,2].<0.9 .AND arcs[.,3].>0.1))/=rows(arcs);
   oft0 = zeros(1,3)|selif(arcs,arcs[.,2].<0.9 .AND arcs[.,3].>0.1);
endif;
/* How many strata have one control and more than one treated? */
oftnr0  = rows(oft0)-1;
/* What is the maximum number of treated? */
oftmax0 = maxc(oft0[.,3])+1;
/* What is the mean number of treated? */
if oftnr0==0;
   oftmean0 = 0;
elseif oftnr0/=0;
   oftmean0 = meanc(oft0[2:rows(oft0),3]+1);
endif;
/* Delete the second column of zeros, do not forget that oft0 still contains a first row of zeros. */
oft0    = oft0[.,1 3];

/* Find the treated who have more than one control, add one emergency row of zeros if there are no such treated. */
if rows(delif(arcs,arcs[.,1].<0.9 .AND arcs[.,3].>0.1))==rows(arcs);
   oft1 = zeros(1,3);
elseif rows(delif(arcs,arcs[.,1].<0.9 .AND arcs[.,3].>0.1))/=rows(arcs);
   oft1 = zeros(1,3)|selif(arcs,arcs[.,1].<0.9 .AND arcs[.,3].>0.1);
endif;
/* How many strata have one treated and more than one control? */
oftnr1  = rows(oft1)-1;
/* What is the maximum number of controls? */
oftmax1 = maxc(oft1[.,3])+1;
/* Delete the first column of zeros, oft1 still contains a row of zeros. */
oft1    = oft1[.,2:3];

/* Delete the arcs from or to the supersource 0 */
arcs = delif(arcs,(arcs[.,1] .< 0.9) .OR (arcs[.,2] .< 0.9));

/* Sorting the arcs */
arcs = sortmc(arcs,1|2);

/* Now merging arcs and the additional information in T is possible */
arcs = arcs~T;

/* Delete the arcs of flow zero */
arcs = selif(arcs,arcs[.,3].>0.1);

/* Delete the column of flows and also experience and generate space for the covariates */
arcs   = arcs[.,1:2 4:11]~zeros(rows(arcs),2*(cols(X1)+1)+9);
clarcs = cols(arcs);

/* Now add the further covariates of the treated and untreated using X1, X0 */
i = 1;
do while i<=rows(arcs);
    j = 1;
    do while j<=rows(X1);
        if arcs[i,1]==X1[j,1];
            arcs[i,12:19] = EX1[j,arcs[i,10]+after-78 arcs[i,10]+after-62 arcs[i,10]+after-46
                            arcs[i,10]+after-30]~X1[j,cols(X1)-1 cols(X1) 4 5+5*(narrow==0)];
            arcs[i,28:(24+cols(X1))] = X1[j,2:(cols(X1)-2)];
        endif;
        j = j + 1;
    endo;
    j = 1;
    do while j<=rows(X0);
        if arcs[i,2]==X0[j,1];
            arcs[i,11]    = EX0[j,cols(EX0)];
            arcs[i,20:27] = EX0[j,arcs[i,10]-arcs[i,29]+X0[j,3]+after-78 arcs[i,10]-arcs[i,29]+X0[j,3]+after-62
                            arcs[i,10]-arcs[i,29]+X0[j,3]+after-46 arcs[i,10]-arcs[i,29]+X0[j,3]+after-30]~
                              X0[j,cols(X0)-1 cols(X0) 4 5+5*(narrow==0)];
            arcs[i,(25+cols(X0)):(21+2*cols(X0))] = X0[j,2:(cols(X0)-2)];
        endif;
        j = j + 1;
    endo;
    i = i + 1;
endo;

/* "arcs" contains the following variables
01: treated: id
02: untreated: id
03: treated: log wage ... year(s) after college
04: untreated: log wage ... year(s) after college
05: treated: prop.score
06: untreated: prop.score
07: treated: index score
08: untreated: index score

09: treated: sample weight 1979
10: treated: yrdeg2
11: untreated: sample weight 1979

12: treated: experience while enrolled
13: treated: experience while not enrolled
14: treated: miss info of experience while enrolled
15: treated: miss info of experience while not enrolled
16: treated: black
17: treated: hispanic
18: treated: math scores
19: treated: highest grades of parents

20: untreated: experience while enrolled
21: untreated: experience while not enrolled
22: untreated: miss info of experience while enrolled
23: untreated: miss info of experience while not enrolled
24: untreated: black
25: untreated: hispanic
26: untreated: math scores
27: untreated: highest grades of parents

28 28: treated: age
29 29: treated: year of high school diploma
30 30: treated: math scores
** 31: treated: auto and shop scores
** 32: treated: private school
** 33: treated: expelled or suspended from school
** 34: treated: curriculum college preparatory
** 35: treated: curriculum general
31 36: treated: highest grades of parents
** 37: treated: occupation parents high
** 38: treated: number of siblings
** 39: treated: born in south, not for AA of narrow model!
32 40: untreated: age
33 41: untreated: year of high school diploma
34 42: untreated: math scores
** 43: untreated: auto and shop scores
** 44: untreated: private school
** 45: untreated: expelled or suspended from school
** 46: untreated: curriculum college preparatory
** 47: untreated: curriculum general
35 48: untreated: highest grades of parents
** 49: untreated: occupation parents high
** 50: untreated: number of siblings
** 51: untreated: born in south, not for AA of narrow model!
1st column: narrow model
2nd column: broad model
*/

/* Separate the strata with more than one treated from the rest. */
i = 1;
arcs0 = zeros(1,clarcs);
do while i<=oftnr0;
   arcs0 = arcs0|selif(arcs,arcs[.,2].==oft0[i+1,1]);
   arcs  = delif(arcs,arcs[.,2].==oft0[i+1,1]);
   i = i + 1;
endo;
if oftnr0>0;
   arcs0 = arcs0[2:rows(arcs0),.];
endif;

/* If there are no such arcs arcs0 will be zeros. */

/* Sort the arcs0 */
if rows(arcs0)>1;                   /* if ==1 ther will be an error in sortmc */
   arcs0 = sortmc(arcs0,2|1);
endif;

/* Calculate the mean effect for the strata with only one treated */
/* Add two auxiliary rows to arcs */
arcs   = zeros(1,clarcs)|arcs|zeros(1,clarcs);
help   = zeros(1,clarcs);
strata = zeros(1,14+cols(X1));
idzero = zeros(1,7);
idone  = zeros(1,7);
/* Collect the treated and the controls such that OLS with these units is possible */
treated = zeros(1,12+cols(X1));
control = zeros(1,13+cols(X0));
/* For assessing the balance of covariates after matching */
balafter = zeros(1,2*rows(balbef));
i = 2;
do while i<=rows(arcs);
   if arcs[i,1]==arcs[i-1,1];
      help = help|arcs[i,.];
   elseif arcs[i,1]/=arcs[i-1,1];
      mean   = (sumc(help[.,4])+help[1,3])/(rows(help)+1);
      var    = (rows(help)+1)/(rows(help)^2)*(sumc((help[.,4]-
                mean).^2)+(help[1,3]-mean)^2);
      effect = meanc(help[.,3]-help[.,4]);
      wage0  = meanc(help[.,4])~meanc(exp(help[.,4]));
      /* mean over the controls' covariates */
      covar0 = meanc(help[.,(25+cols(X0)):(21+2*cols(X0))])';
      strata = strata|(arcs[i-1,1]~effect~var~rows(help)~help[1,5]~help[1,7]~
               meanc(help[.,6])~meanc(help[.,8])~help[1,9]~sumc(help[.,11])~wage0~help[1,3]~exp(help[1,3])
               ~help[1,10]~help[1,18:19]~(help[1,28:(24+cols(X1))]-covar0));
      idzero = idzero|(help[.,2]~(ones(rows(help),1)/rows(help))~help[.,4]~help[.,20:23]);
      idone  = idone|(help[1,1]~1~help[1,3]~help[1,12:15]);
      treated = treated|(help[1,3 9]~1~1~help[1,5]~help[1,10 18 19 12:15]~(help[1,12 13]^2)~
                help[1,16 17 29 30:(24+cols(X1))]);
      control = control|(help[.,4]~(help[.,9]/rows(help))~help[.,11]~ones(rows(help),1)~zeros(rows(help),1)~
                help[.,6]~zeros(rows(help),3)~help[.,20:23]~
                (help[.,20 21]^2)~help[.,24 25 (26+cols(X0)) (27+cols(X0)):(21+2*cols(X0))]);
      balafter = balafter|(help[1,5]~meanc(help[.,6])~help[1,7]~meanc(help[.,8])~help[1,16:17 24:25]~
                 help[1,28:(24+cols(X1))]~covar0);
      help = arcs[i,.];
   endif;
   i = i + 1;
endo;

/* Delete the first two rows of zeros from strata. The second row is due to the added
row to strata when "elseif" is first fulfilled. */
strata   = strata[3:rows(strata),.];
idzero   = idzero[3:rows(idzero),.];
idone    = idone[3:rows(idone),.];
treated  = treated[3:rows(treated),.];
control  = control[3:rows(control),.];
balafter = balafter[3:rows(balafter),.];

/* strata contains
01: id of treated
02: log effect of each stratum
03: variance of stratum effect
04: number of controls in stratum
05: prop.score of treated in stratum
06: mean propensity score of controls in stratum
07: index score of treated in stratum
08: mean index score of controls in stratum
09: sample weight of treated in stratum
10: sum of sample weights of controls in stratum
11: mean log wage of controls in stratum
12: mean wage of controls in stratum
13: mean log wage of treated in stratum
14: mean wage of treated in stratum
15: yrdeg2 of treated in stratum
16: math score of treated
17: grades of parents of treated

18 18: difference in age
19 19: difference in year of high school diploma
20 20: difference in math scores
** 21: difference in auto and shop scores
** 22: difference in private school
** 23: difference in expelled or suspended from school
** 24: difference in curriculum college preparatory
** 25: difference in curriculum general
21 26: difference in highest grades of parents
** 27: difference in occupation parents high
** 28: difference in number of siblings
** 29: difference in born in south, not for AA of narrow model!

balafter contains
01: treated: propensity score
02: untreated: mean propensity score
03: treated: index score
04: untreated: mean index score
05: treated: black
06: treated: hisp
07: untreated: black
08: untreated: hisp

09 09: treated: age
10 10: treated: year of high school diploma
11 11: treated: math scores
** 12: treated: auto and shop scores
** 13: treated: private school
** 14: treated: expelled or suspended from school
** 15: treated: curriculum college preparatory
** 16: treated: curriculum general
12 17: treated: highest grades of parents
** 18: treated: occupation parents high
** 19: treated: number of siblings
** 20: treated: born in south, not for AA of narrow model!

13 21: untreated: age
14 22: untreated: year of high school diploma
15 23: untreated: math scores
** 24: untreated: auto and shop scores
** 25: untreated: private school
** 26: untreated: expelled or suspended from school
** 27: untreated: curriculum college preparatory
** 28: untreated: curriculum general
16 29: untreated: highest grades of parents
** 30: untreated: occupation parents high
** 31: untreated: number of siblings
** 32: untreated: born in south, not for AA of narrow model!

1st column: narrow model
2nd column: broad model

idzero contains information on the untreated
idone contains information on the treated
01: id
02: weight in stratum
03: log wage
04: experience while not enrolled
05: experience while enrolled
06: cumulative missing information dummies of experience (while not enrolled)
07: cumulative missing information dummies of experience while enrolled
*/

/* mean number of controls in strata with exactly one treated */
oftmean1 = meanc(strata[.,4]);

/* Calculate the mean effect for the strata with more than one treated */
/* Add two auxiliary rows to arcs */
arcs0  = zeros(1,cols(arcs))|arcs0|zeros(1,cols(arcs));
help   = zeros(1,cols(arcs));

/* For an extended strata output use stratext with each treated in a single row even if
it is member of a stratum with more than one treated. The fourth column of stratext
indicates whether the treated belongs to such a stratum (0) or not (>0). */

stratext = strata;
/* balaft will have the shape of stratext. However, be careful if covariates' variances
should be calculated since some control might have more than one treated! */

if arcs0[2,1]/=0;                            /* there could be no such strata */
   i = 2;
   do while i<=rows(arcs0);
      if arcs0[i,2]==arcs0[i-1,2];
         help = help|arcs0[i,.];
      elseif arcs0[i,2]/=arcs0[i-1,2];
         mean = (sumc(help[.,3]-help[.,4])+0)/(rows(help)+1);
         var  = (rows(help)+1)/(rows(help)^2)*(sumc((help[.,3]-help[.,4]-
                 mean).^2)+(0-mean)^2);
         effect = meanc(help[.,3]-help[.,4]);
         if help[1,1]/=0;
            strata = strata|(0~effect~var~rows(help)~meanc(help[.,5])~help[1,6]~meanc(help[.,7])~help[1,8]~
                     sumc(help[.,9])~help[1,11]~meanc(help[.,4])~meanc(exp(help[.,4]))~
                     meanc(help[.,3])~meanc(exp(help[.,3]))~meanc(help[.,10])~(meanc(help[.,18:19])')~
                     (meanc(help[.,28:(24+cols(X1))])'-help[1,(25+cols(X0)):(21+2*cols(X0))]));
            idzero = idzero|(help[1,2]~rows(help)~help[1,4]~help[1,20:23]);
            idone  = idone|(help[.,1]~ones(rows(help),1)~help[.,3]~help[.,12:15]);
            h      = rows(help);
            stratext = stratext|(help[.,1]~(help[.,3]-help[.,4])~
                       (ones(h,1).*var)~zeros(h,1)~help[.,5]~help[.,6]~help[.,7]~help[.,8]~
                       help[.,9]~help[.,11]~help[.,4]~exp(help[.,4])~help[.,3]~
                       exp(help[.,3])~help[.,10]~help[.,18:19]~
                       (help[.,28:(24+cols(X1))]-help[.,(25+cols(X0)):(21+2*cols(X0))]));
            treated  = treated|(help[.,3 9]~ones(rows(help),2)~help[.,5]~help[.,10 18 19 12:15]~(help[.,12 13]^2)~
                       help[.,16 17 29 30:(24+cols(X1))]);
            control  = control|(help[1,4]~(help[1,9]*rows(help))~help[1,11]~1~0~help[1,6]~0~0~0~
                       help[1,20:23]~(help[1,20 21]^2)~help[1,24 25 (26+cols(X0)) (27+cols(X0)):(21+2*cols(X0))]);
            balafter = balafter|(help[.,5]~help[.,6]~help[.,7]~help[.,8]~help[.,16:17 24:25]~
                       help[.,28:(24+cols(X1))]~help[.,(25+cols(X0)):(21+2*cols(X0))]);
         endif;
         help = arcs0[i,.];
      endif;
      i = i + 1;
   endo;
endif;

/* Retransform arcs again */
arcs  = arcs[2:(rows(arcs)-1),.];
arcs0 = arcs0[2:(rows(arcs0)-1),.];

/* Finding out the mean prop.score, mean index score, mean log wages and mean wages of the
controls before matching. (The untreated without treated partners who were
deleted before matching in SAS are not taken into consideration.) */
if oftnr0==0;
    mprop0  = meanc(arcs[.,6]);
    mind0   = meanc(arcs[.,8]);
    mwage0  = meanc(arcs[.,4])~meanc(exp(arcs[.,4]));
elseif oftnr0/=0;
   mprop0  = (sumc(arcs[.,6]) + sumc(selif(strata[.,6],strata[.,1].==0)))
             /(rows(arcs) + oftnr0);
   mind0   = (sumc(arcs[.,8]) + sumc(selif(strata[.,8],strata[.,1].==0)))
             /(rows(arcs) + oftnr0);
   mwage0  = (sumc(arcs[.,4]) + sumc(selif(strata[.,11],strata[.,1].==0)))
             /(rows(arcs) + oftnr0)~
             (sumc(exp(arcs[.,4])) + sumc(selif(strata[.,12],strata[.,1].==0)))
             /(rows(arcs) + oftnr0);
endif;

clear arcs, arcs0;

/* Generate the interactions between yrdeg2, math, gradpar and treatment as deviations from their means */
treated = treated[.,1:5]~(treated[.,6:8]-meanc(treated[.,6:8])')~treated[.,9:cols(treated)];

/* additionally, strata now contains
01: 0 to show that there are more than one treated in the stratum
02: log effect of each stratum
03: variance of stratum effect
04: number of treated in stratum
05: mean prop.score of treated in stratum
06: mean propensity score of controls in stratum
07: mean index score of treated in stratum
08: mean index score of controls in stratum
09: sum of sample weight of treated in stratum
10: sample weight of control in stratum
11: mean log wage of control in stratum
12: mean wage of control in stratum
13: mean log wage of treated in stratum
14: mean wage of treated in stratum
15: mean yrdeg2 of treated in stratum
16: mean math score of treated
17: mean grades of parents of treated

18 18: mean difference in age
19 19: mean difference in year of high school diploma
20 20: mean difference in math scores
** 21: mean difference in auto and shop scores
** 22: mean difference in private school
** 23: mean difference in expelled or suspended from school
** 24: mean difference in curriculum college preparatory
** 25: mean difference in curriculum general
21 26: mean difference in highest grades of parents
** 27: mean difference in occupation parents high
** 28: mean difference in number of siblings
** 29: mean difference in born in south
*/

/* In order to examine if everything is okay compute again the mean number of
treated in strata with more than one treated */
oftmean = meanc(selif(strata[.,4],strata[.,1].==0));

/* stratext contains (if there are no strata with more than one treated: stratext=strata)
01: id of (every) treated
02: log effect of each treated, in particular the single effect of each
    treated that belongs to a stratum with more than one control
03: variance of stratum effect (however!)
04: number of controls in stratum, zero indicates that the treated belongs to a
    stratum with more than one treated
05: prop.score of treated
06: propensity score of the control
07: prop.score of treated
08: propensity score of the control
09: (single) sample weight of treated
10: sample weight of control(s)
11: log wage of control in stratum
12: wage of control in stratum
13: log wage of treated in stratum
14: wage of treated in stratum
15: yrdeg2 of treated in stratum
16: math score of treated
17: grades of parents of treated

18 18: difference in age
19 19: difference in year of high school diploma
20 10: difference in math scores
** 11: difference in auto and shop scores
** 22: difference in private school
** 23: difference in expelled or suspended from school
** 24: difference in curriculum college preparatory
** 25: difference in curriculum general
21 26: difference in highest grades of parents
** 27: difference in occupation parents high
** 28: difference in number of siblings
** 29: difference in born in south

balafter is of the same shape as stratext, each treated separately in a row.

treated or control contain, they are of the same shape as stratext:
t  c
01 01: ln wage
02 02: stratum weights of untreated and sample weights of treated
** 03: sample weights of untreated
03 04: constant
04 05: treatment dummy
05 06: p.score
06 07: interaction between treatment and year of college diploma
07 08: interaction between treatment and math scores
08 09: interaction between treatment and grades of parents
09 10: experience while not enrolled
10 11: experience while enrolled
11 12: cumulative missing information dummies of experience (while not enrolled)
12 13: cumulative missing information dummies of experience while enrolled
13 14: square of experience while not enrolled
14 15: square of experience while enrolled
15 16: black
16 17: hispanic
17 18: year of high school diploma

18 19: ASVAB: math knowledge
** 20: ASVAB: auto + shop information
** 21: private school
** 22: expelled or suspended from school
** 23: curriculum college preparatory
** 24: curriculum general
19 25: highest grades of parents
** 26: occupation parents high
** 27: number of siblings
** 28: born in south
*/

/* Balance of covariates after matching */
/* Untreated */
balaft[.,2*after-1] = meanc(balafter[.,2 4 7 8 (6+cols(X0)):(2+2*cols(X0))]);
/* Treated */
balaft[.,2*after]   = meanc(balafter[.,1 3 5 6 9:(5+cols(X1))]);
/* T-test of equality of means */
ttest[.,after] = (2*cdftc(abs((balaft[.,2*after-1]-balaft[.,2*after])./
                 sqrt((sumc((balafter[.,2 4 7 8 (6+cols(X0)):(2+2*cols(X0))] -
                 balaft[.,2*after-1]')^2)+ sumc((balafter[.,1 3 5 6 9:(5+cols(X1))]-balaft[.,2*after]')^2))/
                 ((rows(balafter)-1)*rows(balafter)))),2*(rows(balafter)-1)) .> 0.05);

/* Percent bias reduction per variable */
balperc[.,after] = 1 - abs((balaft[.,2*after] - balaft[.,2*after-1])./(mbalbef1 - mbalbef0));

/* total sum of sample weights */
alweigh = sumc(strata[.,9]);

/* mean effect of treatment on the treated */
meffect = (strata[.,2]'strata[.,9])/alweigh;

/* variance of meffect */
mvar    = ((strata[.,9].*strata[.,3])'strata[.,9])/(alweigh^2);

/* p-value of meffect */
p = cdfnc(meffect/sqrt(mvar));

/* Number of strata */
table1[2*after-1,5]  = rows(strata);
/* Number of treated */
table1[2*after,5]    = n_t;
/* Number of controls */
table1[2*after,6]    = n_c;
/* Mean number of treated in strata with more than one treated */
table1[2*after-1,7] = oftmean0;
/* Maximum number of treated in strata with more than one treated */
table1[2*after,7] = oftmax0;


/* Outfile idzero and idone for STATA */
/*
str1 = "D:\\Matching Extremes\\Stata\\ids";
str3 = ".out";
str4  = str1$+str2$+str3;

output file = ^str4 on;
format /rd 13,6;
screen off;
print idone|idzero;
screen on;
output off;
*/

/*****************************************************************************
 Confidence intervals and tests in the linear model for tau_s
*****************************************************************************/

nos  = rows(strata);

/* Define the regressors */

/* Mean level of heterogeneity covariates of treated */
Xbar = (strata[.,9]'strata[.,15:17])/sumc(strata[.,9]);

/* Regressors for matching with regression adjustment */
/*X    = ones(nos,1)~(strata[.,15:17]-Xbar)~strata[.,18:(14+cols(X1))];*/
/* Regressors for pure matching without regression adjustment */
Xpur = ones(nos,1)~(strata[.,15:17]-Xbar);

noX  = cols(X);

/* OLS using sample weights */
/*delta  = invpd(X'(strata[.,9].*X))*X'(strata[.,9].*strata[.,2]);*/
delpur = invpd(Xpur'(strata[.,9].*Xpur))*Xpur'(strata[.,9].*strata[.,2]);

/* Calculate tau's approximate variance, V is one column here */
/*V    = strata[.,3]+(1./strata[.,4]).*((X*delta).^2)-
       2*(X*delta).*(1./strata[.,4]).*(strata[.,13]-strata[.,11]);*/
Vpur = strata[.,3]+(1./strata[.,4]).*((Xpur*delpur).^2)-
       2*(Xpur*delpur).*(1./strata[.,4]).*(strata[.,13]-strata[.,11]);

/* Calculate delta's approximate variance */
/*W    = invpd(X'(strata[.,9].*X))*X'(eye(nos).*(V.*(strata[.,9].^2)))*X*
       invpd(X'(strata[.,9].*X));
test = delta[1:4]./sqrt(diag(W[1:4,1:4]));
pdel = cdfchic(test^2,1);              /* two-sided */*/

Wpur = invpd(Xpur'(strata[.,9].*Xpur))*Xpur'(eye(nos).*(Vpur.*(strata[.,9].^2)))*Xpur*
       invpd(Xpur'(strata[.,9].*Xpur));
test = delpur[1:4]./sqrt(diag(Wpur[1:4,1:4]));
ppur = cdfchic(test^2,1);              /* two-sided */

table1[2*after-1,1]   = exp(delpur[1])-1;
/*table1[2*after-1,2]   = exp(delta[1])-1;*/
tabhet[2*after-1,1:3] = delpur[2:4]';
/*tabhet[2*after-1,4:6] = delta[2:4]';*/

table1[2*after,1]   = exp(delpur[1])*sqrt(Wpur[1,1]);
/*table1[2*after,2]   = exp(delta[1])*sqrt(W[1,1]);*/
tabhet[2*after,1:3] = sqrt(diag(Wpur[2:4,2:4])');
/*tabhet[2*after,4:6] = sqrt(diag(W[2:4,2:4]))';*/

ptable1[after,1]   = ppur[1];
/*ptable1[after,2]   = pdel[1];*/
ptabhet[after,1:3] = ppur[2:4]';
/*ptabhet[after,4:6] = pdel[2:4]';*/

/**************************************************************************************/
/* Mean effect of treatment on a randomly assigned person */

weight = strata[.,9] + strata[.,10];

/* Mean level of heterogeneity covariates of treated */
Xbar = (weight'strata[.,15:17])/sumc(weight);

/* Regressors for matching with regression adjustment */
/*X    = ones(nos,1)~(strata[.,15:17]-Xbar)~strata[.,16:(12+cols(X1))];*/
/* Regressors for pure matching without regression adjustment */
Xpur = ones(nos,1)~(strata[.,15:17]-Xbar);


/* OLS using sample weights times (1-p.score) */
/*delta  = invpd(X'(weight.*X))*X'(weight.*strata[.,2]);*/
delpur = invpd(Xpur'(weight.*Xpur))*Xpur'(weight.*strata[.,2]);

/* Calculate tau's approximate variance, V is one column here */
/*V    = strata[.,3]+(1./strata[.,4]).*((X*delta).^2)-
       2*(X*delta).*(1./strata[.,4]).*(strata[.,13]-strata[.,11]);*/
Vpur = strata[.,3]+(1./strata[.,4]).*((Xpur*delpur).^2)-
       2*(Xpur*delpur).*(1./strata[.,4]).*(strata[.,13]-strata[.,11]);

/* Calculate delta's approximate variance */
/*W    = invpd(X'(weight.*X))*X'(eye(nos).*(V.*(weight.^2)))*X*
       invpd(X'(weight.*X));
test = delta[1:4]./sqrt(diag(W[1:4,1:4]));
pdel = cdfchic(test^2,1);              /* two-sided */*/

Wpur = invpd(Xpur'(weight.*Xpur))*Xpur'(eye(nos).*(Vpur.*(weight.^2)))*Xpur*
       invpd(Xpur'(weight.*Xpur));
test = delpur[1:4]./sqrt(diag(Wpur[1:4,1:4]));
ppur = cdfchic(test^2,1);              /* two-sided */

table1[2*after-1,3] = exp(delpur[1])-1;
/*table1[2*after-1,4] = exp(delta[1])-1;*/
table1[2*after,3]   = exp(delpur[1])*sqrt(Wpur[1,1]);
/*table1[2*after,4]   = exp(delta[1])*sqrt(W[1,1]);*/
ptable1[after,3]    = ppur[1];
/*ptable1[after,4]    = pdel[1];*/


/*****************************************************************************
 Output for the given year after college
*****************************************************************************/

output file="D:\\Matching Extremes\\gauss\\post12.out" on;
/*
format /rd 3,0;
print;
print "=========================================";
print "Results for" after "year(s) after college";
print "=========================================";
format /rd 11,5;
print;

print "number of strata                        :" rows(strata);
print "number of treated                       :" n_t;
print "number of controls                      :" n_c;
print;
print "strata with more than one treated       :" oftnr0;
print "max. number of treated in these strata  :" oftmax0;
print "mean number of treated in these strata  :" oftmean0;
print;
print "strata with more than one control       :" oftnr1;
print "max. number of controls in these strata :" oftmax1;
print;
print "mean number of controls in strata with";
print "                   exactly one treated  :" oftmean1;
print;
print "mean prop.score of matched controls     :" strata[.,6]'strata[.,9]/alweigh;
print "mean prop.score of unmatched controls   :" mprop0;
print;
print "mean log wage of matched controls       :" strata[.,11]'strata[.,9]/alweigh;
print "mean log wage of unmatched controls     :" mwage0[1];
print "mean wage of matched controls           :" strata[.,12]'strata[.,9]/alweigh;
print "mean wage of unmatched controls         :" mwage0[2];
print;
print "mean log wage of treated                :" stratext[.,13]'stratext[.,9]/alweigh;
print "mean wage of treated                    :" stratext[.,14]'stratext[.,9]/alweigh;
output off;

str1 = "D:\\Matching Extremes\\Stata\\strata";
str3 = ".out";
str4  = str1$+str2$+str3;

output file = ^str4 on;
format /rd 13,6;
screen off;
print stratext;
screen on;
output off;
*/


/*
/*****************************************************************************
The pooled model: all years after college togehther
*****************************************************************************/

if aftermax>1;

if after==1;
   pooled = strata[.,2 9 3 4 13 11 15:17]~ones(nos,1)~zeros(nos,aftermax-
            1)~strata[.,18:cols(strata)];
elseif after>1 AND after<aftermax;
   pooled = pooled|(strata[.,2 9 3 4 13 11 15:17]~zeros(nos,after-1)~ones(nos,1)~
            zeros(nos,aftermax-after)~strata[.,18:cols(strata)]);
elseif after==aftermax;
   pooled = pooled|(strata[.,2 9 3 4 13 11 15:17]~zeros(nos,aftermax-1)~ones(nos,1)~strata[.,18:cols(strata)]);
endif;

/* pooled contains
01: log effect of each stratum
02: sum of the sample weights of treated in stratum
03: variance of stratum effect
04: number of units in stratum minus 1
05: mean log wage of treated in stratum
06: mean log wage of controls in stratum

07: mean yrdeg2 of treated
08: mean math score of treated
09: mean grades of parents of treated
10: first dummy for year after college
...
19: last dummy for year after college

20 20: mean difference in age
21 21: mean difference in year of high school diploma
22 22: mean difference in math scores
** 23: mean difference in auto and shop scores
** 24: mean difference in private school
** 25: mean difference in expelled or suspended from school
** 26: mean difference in curriculum college preparatory
** 27: mean difference in curriculum general
23 28: mean difference in highest grades of parents
** 29: mean difference in occupation parents high
** 30: mean difference in number of siblings
** 31: mean difference in born in south, not for AA of narrow model!
*/

endif;
*/


/*
/*****************************************************************************
 OLS regression with individuals from stratification
*****************************************************************************/

/* Dependent variable */
lnw = treated[.,1]|control[.,1];

/* Weights according to stratification */
weighA = treated[.,2]|control[.,2];

/* Pure sample weights */
weighB = treated[.,2]|control[.,3];

/* Regressors */
XX = treated[.,3 4 6:cols(treated)]|control[.,4 5 7:cols(control)];
pp = (treated[.,3:4 6:8 5]~treated[.,5]^2~treated[.,5]^3~treated[.,5]^4~treated[.,15:17])
        |(control[.,4:5 7:9 6]~control[.,6]^2~control[.,6]^3~control[.,6]^4~control[.,16:18]);

/* Experience measures and weights */
exc1    = treated[.,9:14];
exc0    = control[.,10:15];
weighA1 = treated[.,2];
weighA0 = control[.,2];
weighB1 = treated[.,2];
weighB0 = control[.,3];

/* XX contains
01: constant
02: treatment dummy
03: interaction between treatment and year of college diploma
04: interaction between treatment and math scores
05: interaction between treatment and grades of parents
06: experience while not enrolled
07: experience while enrolled
08: cumulative missing information dummies of experience (while not enrolled)
09: cumulative missing information dummies of experience while enrolled
10: square of experience while not enrolled
11: square of experience while enrolled
12: black
13: hispanic
14: year of high school diploma
15: remaining covariates
...

pp contains
01: constant
02: treatment dummy
03: interaction between treatment and year of college diploma
04: interaction between treatment and math scores
05: interaction between treatment and grades of parents
06: p.score
07: p.score^2
08: p.score^3
09: p.score^4
10: black
11: hispanic
12: year of high school diploma
*/

/* Weighted OLS regression A */
b    = invpd(XX'(weighA.*XX))*XX'(weighA.*lnw);
eps  = lnw - XX*b;
varb = (eps'(weighA.*eps))*invpd(XX'(weighA.*XX))/(rows(XX)-cols(XX));
tb   = b./sqrt(diag(varb));
pb   = 2*cdfnc(abs(tb));                     /* two-sided */

/*
print b[6:11]';
mm1 = selif(XX[.,6:11],1-XX[.,2]);
print meanc(mm1[.,1 2]~(mm1[.,1]+mm1[.,2])~mm1[.,3 4]~(mm1[.,3]+mm1[.,4])~mm1[.,5 6])';
mm2 = selif(XX[.,6:11],XX[.,2]);
print meanc(mm2[.,1 2]~(mm2[.,1]+mm2[.,2])~mm2[.,3 4]~(mm2[.,3]+mm2[.,4])~mm2[.,5 6])';
print meanc(mm1[.,1 2])'-meanc(mm2[.,1 2])';
print;
*/

/* Coefficient estimate */
table2[2*after-1,1] = exp(b[2])-1;
table2[2*after,1]   = exp(b[2])*sqrt(varb[2,2]);

tabhet[2*after-1,7:9] = b[3:5]';
tabhet[2*after,7:9]   = sqrt(varb[3,3]~varb[4,4]~varb[5,5]);

ptable2[after,1]   = pb[2];
ptabhet[after,7:9] = pb[3:5]';

/* Treatment effect estimate */
dexp  = 1|(meancw(exc1,weighA1)-meancw(exc0,weighA0));
deff  = dexp'b[2 6:11];
sdeff = sqrt(dexp'varb[2 6:11,2 6:11]*dexp);
pexp  = 2*cdfnc(abs(deff./sdeff));
table2[2*after-1,2] = exp(deff)-1;
table2[2*after,2]   = exp(deff)*sdeff;
ptable2[after,2]    = pexp;

/* Weighted OLS regression A on p.score */
bp    = invpd(pp'(weighA.*pp))*pp'(weighA.*lnw);
epsp  = lnw - pp*bp;
varbp = (epsp'(weighA.*epsp))*invpd(pp'(weighA.*pp))/(rows(pp)-cols(pp));
tbp   = bp./sqrt(diag(varbp));
pbp   = 2*cdfnc(abs(tbp));                     /* two-sided */

/* Coefficient estimate */
table2[2*after-1,4] = exp(bp[2])-1;
table2[2*after,4]   = exp(bp[2])*sqrt(varbp[2,2]);
ptable2[after,4]   = pbp[2];

/* Weighted OLS regression with treatment*experience interactions */
XXi   = XX[.,1:11]~(XX[.,2].*XX[.,6 7 10 11])~XX[.,12:cols(XX)];
bi    = invpd(XXi'(weighA.*XXi))*XXi'(weighA.*lnw);
epsi  = lnw - XXi*bi;
varbi = (epsi'(weighA.*epsi))*invpd(XXi'(weighA.*XXi))/(rows(XXi)-cols(XXi));
tbi   = bi./sqrt(diag(varbi));
pbi   = 2*cdfnc(abs(tbi));                     /* two-sided */

/* Treatment effect estimate */
dexpi  = 1|(meancw(exc1,weighA1)-meancw(exc0,weighA0))|meancw(exc1[.,1:2 5:6],weighA1);
deffi  = dexpi'bi[2 6:15];
sdeffi = sqrt(dexpi'varbi[2 6:15,2 6:15]*dexpi);
pexpi  = 2*cdfnc(abs(deffi./sdeffi));
table2[2*after-1,3] = exp(deffi)-1;
table2[2*after,3]   = exp(deffi)*sdeffi;
ptable2[after,3]    = pexpi;


/* Weighted OLS regression B */
b    = invpd(XX'(weighB.*XX))*XX'(weighB.*lnw);
eps  = lnw - XX*b;
varb = (eps'(weighB.*eps))*invpd(XX'(weighB.*XX))/(rows(XX)-cols(XX));
tb   = b./sqrt(diag(varb));
pb   = 2*cdfnc(abs(tb));                     /* two-sided */

/* Coefficient estimate */
table2[2*after-1,5] = exp(b[2])-1;
table2[2*after,5]   = exp(b[2])*sqrt(varb[2,2]);
ptable2[after,5]    = pb[2];

/* Treatment effect estimate */
dexp = 1|(meancw(exc1,weighB1)-meancw(exc0,weighB0));
deff  = dexp'b[2 6:11];
sdeff = sqrt(dexp'varb[2 6:11,2 6:11]*dexp);
pexp = 2*cdfnc(abs(deff./sdeff));
table2[2*after-1,6] = exp(deff)-1;
table2[2*after,6]   = exp(deff)*sdeff;
ptable2[after,6]    = pexp;

/* Weighted OLS regression B on p.score */
bp    = invpd(pp'(weighB.*pp))*pp'(weighB.*lnw);
epsp  = lnw - pp*bp;
varbp = (epsp'(weighB.*epsp))*invpd(pp'(weighB.*pp))/(rows(pp)-cols(pp));
tbp   = bp./sqrt(diag(varbp));
pbp   = 2*cdfnc(abs(tbp));                     /* two-sided */

/* Coefficient estimate */
table2[2*after-1,8] = exp(bp[2])-1;
table2[2*after,8]   = exp(bp[2])*sqrt(varbp[2,2]);
ptable2[after,8]   = pbp[2];

/* Weighted OLS regression with treatment*experience interactions */
XXi   = XX[.,1:11]~(XX[.,2].*XX[.,6 7 10 11])~XX[.,12:cols(XX)];
bi    = invpd(XXi'(weighB.*XXi))*XXi'(weighB.*lnw);
epsi  = lnw - XXi*bi;
varbi = (epsi'(weighB.*epsi))*invpd(XXi'(weighB.*XXi))/(rows(XXi)-cols(XXi));
tbi   = bi./sqrt(diag(varbi));
pbi   = 2*cdfnc(abs(tbi));                     /* two-sided */

/* Treatment effect estimate */
dexpi = 1|(meancw(exc1,weighB1)-meancw(exc0,weighB0))|meancw(exc1[.,1:2 5:6],weighB1);
deffi  = dexpi'bi[2 6:15];
sdeffi = sqrt(dexpi'varbi[2 6:15,2 6:15]*dexpi);
pexpi = 2*cdfnc(abs(deffi./sdeffi));
table2[2*after-1,7] = exp(deffi)-1;
table2[2*after,7]   = exp(deffi)*sdeffi;
ptable2[after,7]    = pexpi;
*/


/*
/*****************************************************************************
The pooled OLS model: all years after college togehther
*****************************************************************************/

if aftermax>1;

if after==1;
   XXpool  = XX[.,3:cols(XX)]~XX[.,1]~zeros(rows(XX),aftermax-1)~XX[.,2]~zeros(rows(XX),aftermax-1);
/*   pppool  = pp[.,3:cols(pp)]~pp[.,1]~zeros(rows(pp),aftermax-1)~pp[.,2]~zeros(rows(pp),aftermax-1);*/
   lnwpool = lnw;
   wApool  = weighA;
elseif after>1 AND after<aftermax;
   XXpool  = XXpool|(XX[.,3:cols(XX)]~zeros(rows(XX),after-1)~XX[.,1]~zeros(rows(XX),aftermax-after)~
                                     zeros(rows(XX),after-1)~XX[.,2]~zeros(rows(XX),aftermax-after));
/*   pppool  = pppool|(pp[.,3:cols(pp)]~zeros(rows(pp),after-1)~pp[.,1]~zeros(rows(pp),aftermax-after)~
                                     zeros(rows(pp),after-1)~pp[.,2]~zeros(rows(pp),aftermax-after));*/
   lnwpool = lnwpool|lnw;
   wApool  = wApool|weighA;
elseif after==aftermax;
   XXpool = XXpool|(XX[.,3:cols(XX)]~zeros(rows(XX),aftermax-1)~XX[.,1]~
                                     zeros(rows(XX),aftermax-1)~XX[.,2]);
/*   pppool = pppool|(pp[.,3:cols(pp)]~zeros(rows(pp),aftermax-1)~pp[.,1]~
                                     zeros(rows(pp),aftermax-1)~pp[.,2]);*/
   lnwpool = lnwpool|lnw;
   wApool  = wApool|weighA;
endif;

/* XXpooled contains
01: interaction between treatment and year of college diploma
02: interaction between treatment and math scores
03: interaction between treatment and grades of parents
04: experience while not enrolled
05: experience while enrolled
06: cumulative missing information dummies of experience (while not enrolled)
07: cumulative missing information dummies of experience while enrolled
08: square of experience while not enrolled
09: square of experience while enrolled
10: black
11: hispanic
12: year of high school diploma
13: remaining covariates
...
xx: constants for each year after college
xx: ...
xx: treatment dummy for each year after college
xx: ...

pppooled contains -- but is not needed at the moment
01: interaction between treatment and year of college diploma
02: interaction between treatment and math scores
03: interaction between treatment and grades of parents
04: p.score
05: p.score^2
06: p.score^3
07: p.score^4
08: constants for each year after college
09: ...
10: treatment dummy for each year after college
11: ...
*/

endif;
*/


/*****************************************************************************
Next iteration
*****************************************************************************/

after = after + 1;
endo;


/*
/*****************************************************************************
The pooled model
*****************************************************************************/

if aftermax>1;

/* Sum of weights */
alweigh = sumc(pooled[.,2]);

/* Regress stratum differences on the level and differenced covariates, and dummies for year after college */

/* Regressors for matching with regression adjustment */
X = pooled[.,7:cols(pooled)];

/* Regressors for pure matching without regression adjustment */
Xpur = pooled[.,7:(aftermax+9)];

delpool  = invpd(X'(pooled[.,2].*X))*X'(pooled[.,2].*pooled[.,1]);
delpopur = invpd(Xpur'(pooled[.,2].*Xpur))*Xpur'(pooled[.,2].*pooled[.,1]);

tabhet[2*aftermax+1,1:3] = delpopur[1:3]';
tabhet[2*aftermax+1,4:6] = delpool[1:3]';

/* Calculate approx. variance of the stratum effects, V is a column here */
V    = pooled[.,3] + (1./pooled[.,4]).*((X*delpool).^2) -
       2*(X*delpool).*(1./pooled[.,4]).*(pooled[.,5]-pooled[.,6]);
Vpur = pooled[.,3] + (1./pooled[.,4]).*((Xpur*delpopur).^2) -
       2*(Xpur*delpopur).*(1./pooled[.,4]).*(pooled[.,5]-pooled[.,6]);

/* Calculate delpool's approx. variance */
W    = invpd(X'(pooled[.,2].*X))*X'(eye(rows(pooled)).*(V.*( pooled[.,2].^2)))*X*invpd(X'(pooled[.,2].*X));
test = delpool[1:3]./sqrt(diag(W[1:3,1:3]));
p2   = cdfchic(test^2,1);                 /* two-sided */

Wpur = invpd(Xpur'(pooled[.,2].*Xpur))*Xpur'
       (eye(rows(pooled)).*(Vpur.*( pooled[.,2].^2)))*Xpur*invpd(Xpur'(pooled[.,2].*Xpur));
test  = delpopur[1:3]./sqrt(diag(Wpur[1:3,1:3]));
p2pur = cdfchic(test^2,1);                 /* two-sided */

tabhet[2*aftermax+2,1:3] = sqrt(diag(Wpur[1:3,1:3]))';
tabhet[2*aftermax+2,4:6] = sqrt(diag(W[1:3,1:3]))';
ptabhet[aftermax+1,1:3]  = p2pur';
ptabhet[aftermax+1,4:6]  = p2';


/* Pooled OLS */

/* Weighted OLS regression A */
b    = invpd(XXpool'(wApool.*XXpool))*XXpool'(wApool.*lnwpool);
eps  = lnwpool - XXpool*b;
varb = (eps'(wApool.*eps))*invpd(XXpool'(wApool.*XXpool))/(rows(XXpool)-cols(XXpool));
tb   = b./sqrt(diag(varb));
pb   = 2*cdfnc(abs(tb));                     /* two-sided */

tabhet[2*aftermax+1,7:9] = b[1:3]';
tabhet[2*aftermax+2,7:9] = sqrt(diag(varb[1:3,1:3]))';
ptabhet[aftermax+1,7:9]  = pb[1:3]';


elseif aftermax==1;
print "No pooled model!";;
print;

endif;
*/


/*****************************************************************************
LaTeX table output
*****************************************************************************/

print;
print;
print "T A B L E S";
print;


print; print "Matching Effects"; print;

j = 1;
do while j<=aftermax;

    st1[2*j,1:4] = ")  "~")  "~")  "~")  ";

    k = 1;
    do while k<=4;
        /* Transform p-values into stars */
        if ptable1[j,k] <= .01;
           st1[2*j-1,k]  = "***";
        elseif ptable1[j,k] <= .05;
           st1[2*j-1,k]  = "** ";
        elseif ptable1[j,k] <= .1;
           st1[2*j-1,k]  = "*  ";
        elseif ptable1[j,k] <= 2;
           st1[2*j-1,k]  = "   ";
        endif;

        /* Print results */
        "&";; format /rdn 2,0; print $pth[2*j-1];;
              format /rdn 5,3; print table1[2*j-1,k];;
              format /rdn 3,0; print $st1[2*j-1,k];;

        k = k + 2;
    endo;

    "&";; format /rdn 4,0; print table1[2*j-1,5];;
    "&";; format /rdn 4,0; print table1[2*j,5];;
    "&";; format /rdn 4,1; print table1[2*j-1,7];

    k = 1;
    do while k<=4;
        "&";; format /rdn 2,0; print $pth[2*j];;
              format /rdn 5,3; print table1[2*j,k];;
              format /rdn 3,0; print $st1[2*j,k];;
        k = k + 2;
    endo;

    "&";; format /rdn 4,0; print"    ";;
    "&";; format /rdn 4,0; print table1[2*j,6];;
    "&";; format /rdn 4,1; print table1[2*j,7];

    j = j + 1;
endo;

/*
print; print "OLS Results"; print;

j = 1;
do while j<=aftermax;

    st2[2*j,1:12] = ")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  ";

    k = 1;
    do while k<=12;
        /* Transform p-values into stars */
        if ptable2[j,k] <= .01;
           st2[2*j-1,k]  = "***";
        elseif ptable2[j,k] <= .05;
           st2[2*j-1,k]  = "** ";
        elseif ptable2[j,k] <= .1;
           st2[2*j-1,k]  = "*  ";
        elseif ptable2[j,k] <= 2;
           st2[2*j-1,k]  = "   ";
        endif;

        if k /= 12;
            /* Print results */
            "&";; format /rdn 2,0; print $pth[2*j-1];;
                  format /rdn 5,3; print table2[2*j-1,k];;
                  format /rdn 3,0; print $st2[2*j-1,k];;
        endif;

        k = k + 1;
    endo;

    "&";; format /rdn 2,0; print $pth[2*j-1];;
          format /rdn 5,3; print table2[2*j-1,12];;
          format /rdn 3,0; print $st2[2*j-1,12];

    k = 1;
    do while k<=11;
        "&";; format /rdn 2,0; print $pth[2*j];;
              format /rdn 5,3; print table2[2*j,k];;
              format /rdn 3,0; print $st2[2*j,k];;
        k = k + 1;
    endo;

    "&";; format /rdn 2,0; print $pth[2*j];;
          format /rdn 5,3; print table2[2*j,12];;
          format /rdn 3,0; print $st2[2*j,12];

    j = j + 1;
endo;

print; print "Table Heterogeneity"; print;

j = 1;
do while j<=aftermax+1;

    sthet[2*j,.] = ")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  "~")  ";

    k = 1;
    do while k<=9;
        /* Transform p-values into stars */
        if ptabhet[j,k] <= .01;
           sthet[2*j-1,k]  = "***";
        elseif ptabhet[j,k] <= .05;
           sthet[2*j-1,k]  = "** ";
        elseif ptabhet[j,k] <= .1;
           sthet[2*j-1,k]  = "*  ";
        elseif ptabhet[j,k] <= 2;
           sthet[2*j-1,k]  = "   ";
        endif;

        /* Print results */

        k = k + 1;
    endo;

    k = 1;
    do while k<=8;
        "&";; format /rdn 2,0; print $pth[2*j-1];;
              format /rdn 5,3; print tabhet[2*j-1,k];;
              format /rdn 3,0; print $sthet[2*j-1,k];;
        k = k + 1;
    endo;

    "&";; format /rdn 2,0; print $pth[2*j-1];;
          format /rdn 5,3; print tabhet[2*j-1,9];;
          format /rdn 3,0; print $sthet[2*j-1,9];

    k = 1;
    do while k<=8;
        "&";; format /rdn 2,0; print $pth[2*j];;
              format /rdn 5,3; print tabhet[2*j,k];;
              format /rdn 3,0; print $sthet[2*j,k];;
        k = k + 1;
    endo;

    "&";; format /rdn 2,0; print $pth[2*j];;
          format /rdn 5,3; print tabhet[2*j,9];;
          format /rdn 3,0; print $sthet[2*j,9];

    j = j + 1;
endo;
*/


print; print "Table Balance of Covariates in Year 2"; print;

format /rdn 8,3;
if aftermax>=2;
    /* Only year 2 (and 6) after college */
    k = 1;
    do while k<=rows(balaft);
        "&";; format /rdn 6,2; print balbef[k,1];;
        "&";; format /rdn 6,2; print balbef[k,2];;
        "&";; format /rdn 2,0; print ttest0[k];;
        "&";; format /rdn 6,2; print balaft[k,3];;
        "&";; format /rdn 6,2; print balaft[k,4];;
        "&";; format /rdn 2,0; print ttest[k,2];;
        "&";; format /rdn 4,0; print 100*balperc[k,2];; "&";
/*        "&";; format /rdn 6,2; print balaft[k,11];;
        "&";; format /rdn 6,2; print balaft[k,12];;
        "&";; format /rdn 2,0; print ttest[k,6];;
        "&";; format /rdn 4,0; print 100*balperc[k,6];*/
        k = k + 1;
    endo;
    print "&      &      &  &      &      &  &";; 100*meanc(balperc[3:rows(balperc),2]);; "&";
    print "&      &      &  &      &      &  &";; 100*meanc(balperc[3:(rows(balperc)-1),2]);; "&";
elseif aftermax==1;
    /* Only year 1 */
    k = 1;
    do while k<=rows(balaft);
        "&";; format /rdn 6,2; print balbef[k,1];;
        "&";; format /rdn 6,2; print balbef[k,2];;
        "&";; format /rdn 2,0; print ttest0[k];;
        "&";; format /rdn 6,2; print balaft[k,1];;
        "&";; format /rdn 6,2; print balaft[k,2];;
        "&";; format /rdn 2,0; print ttest[k,1];;
        "&";; format /rdn 4,2; print balperc[k,1];
        k = k + 1;
    endo;
    print "&";; meanc(balperc[3:rows(balperc),1]);
    print "&";; meanc(balperc[3:(rows(balperc)-1),1]);
endif;


if aftermax==10;
print; print "Table Balance of Covariates, Mean Over All Years"; print;

format /rdn 8,3;
    k = 1;
    do while k<=rows(balaft);
/*        "&";; format /rdn 6,2; print balbef[k,1];;
        "&";; format /rdn 6,2; print balbef[k,2];;
        "&";; format /rdn 2,0; print ttest0[k];; */

        "&";; format /rdn 6,2; print meancw(balaft[k,1 3 5 7 9 11 13 15 17 19]',table1[1 3 5 7 9 11 13 15 17 19,5]);;
        "&";; format /rdn 6,2; print meancw(balaft[k,2 4 6 8 10 12 14 16 18 20]',table1[1 3 5 7 9 11 13 15 17 19,5]);;
        "&";; format /rdn 4,2; print meancw(ttest[k,.]',table1[1 3 5 7 9 11 13 15 17 19,5]);;
        "&";; format /rdn 4,0; print 100*meancw(balperc[k,.]',table1[1 3 5 7 9 11 13 15 17 19,5]);; "&";
        k = k + 1;
    endo;
    print "&      &      &    &";; 100*meancw(meanc(balperc[3:rows(balperc),.]),table1[1 3 5 7 9 11 13 15 17 19,5]);; "&";
    print "&      &      &    &";; 100*meancw(meanc(balperc[3:(rows(balperc)-1),.]),table1[1 3 5 7 9 11 13 15 17 19,5]);; "&";
endif;


/*****************************************************************************
End of outer iterations
*****************************************************************************/

iter = iter + 1;
endo;


/*****************************************************************************
PROCEDURE meancw
*****************************************************************************/
/* This procedure produces a weighted mean; weights are w */

PROC (1) = meancw(a,w);
RETP(a'w/sumc(w));
ENDP;
