/*****************************************************************************/
/* "ASSESSING THE PERFORMANCE OF MATCHING ALGORITHMS WHEN ... "              */
/* Gauss File:  gpair08.G                                                    */
/*****************************************************************************/

new;

output file="D:\\Matching Extremes\\gauss\\gpair08.out" on;
screen on;


/*****************************************************************************
 Outer iterations over all specifications f1 to f8
*****************************************************************************/

/* Iteration over all matching setups in folders f1 to f8 */
iter = 3;
do while iter<=8;


/*****************************************************************************
 Monte Carlo iterations to assess stochastics in the greedy algorithm
*****************************************************************************/

/* Iteration over all matching setups in folders f1 to f8 */
mciter = 1;

if iter==3;
    mciter = 4;
endif;

do while mciter<=20;


/*****************************************************************************
Primer decisions
*****************************************************************************/

/* Specification */

/* Broad (0) or narrow (1) probit model */
narrow = 0;

/* College degree to evaluate
0: AA
1: BA
2: MA+
*/
coll = 1;

/* Sex
0: women
1: men
*/
men = 1;

/* Matching on the p.score or on the index score
0: p.score
1: index score
*/
if iter<=4;
    score = 0;
else;
    score = 1;
endif;

/* Choose between Mahalanobis or pure p.score matching within calipers
0: p.score
1: Mahalanobis
*/
if iter==1 OR iter==2 OR iter==5 OR iter==6;
    mahal = 0;
else;
    mahal = 1;
endif;

/* For men, BA, broad probit set caliper
0: p.score = 0.05 or index = 0.3
1: p.score = 0.10 or index = 0.6
*/
if iter==1 OR iter==3 OR iter==5 OR iter==7;
    caliper = 0;
else;
    caliper = 1;
endif;

if score==0;
    if caliper==0;
        pcal = 0.05;
    elseif caliper==1;
        pcal = 0.1;
    endif;
elseif score==1;
    if caliper==0;
        pcal = 0.3;
    elseif caliper==1;
        pcal = 0.6;
    endif;
endif;

/* Choose how many years after college you want the treatment effect */
/* maximum: 15 years after college */
aftermax = 10;


/*****************************************************************************
 Preparations
*****************************************************************************/

/* Load the data */

if narrow==0;
    if men==0;
        if coll==0;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bwa.asc";
            print "Broad probit, Women, Associate's";
            /* Propensity score caliper */
        elseif coll==1;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bwb.asc";
            print "Broad probit, Women, Bachelor's";
        elseif coll==2;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bwm.asc";
            print "Broad probit, Women, Master's";
            endif;
    elseif men==1;
        if coll==0;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bma.asc";
            print "Broad probit, Men, Associate's";
        elseif coll==1;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bmb.asc";
            print "Broad probit, Men, Bachelor's";
        elseif coll==2;
            load S[] = "D:\\Matching Extremes\\stata\\data\\bmm.asc";
            print "Broad probit, Men, Master's";
        endif;
    endif;
elseif narrow==1;
    if men==0;
        if coll==0;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nwa.asc";
            print "Narrow probit, Women, Associate's";
        elseif coll==1;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nwb.asc";
            print "Narrow probit, Women, Bachelor's";
        elseif coll==2;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nwm.asc";
            print "Narrow probit, Women, Master's";
        endif;
    elseif men==1;
        if coll==0;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nma.asc";
            print "Narrow probit, Men, Associate's";
        elseif coll==1;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nmb.asc";
            print "Narrow probit, Men, Bachelor's";
        elseif coll==2;
            load S[] = "D:\\Matching Extremes\\stata\\data\\nmm.asc";
            print "Narrow probit, Men, Master's";
        endif;
    endif;
endif;

if narrow>=1;
    k = 97;                                        /* number of variables */
elseif narrow==0;
    k = 105;                                       /* number of variables */
endif;

n = rows(S)/k;                                     /* number of observations */
S = reshape(S,n,k);

/*
S contains the variables:
01: id
02: t, treatment dummy
03: p, estimated propensity score
04: ind, estimated index score
05: sample weight of 1979
06: black dummy
07: hisp dummy
08: age 1979
09: year HS-diploma
10: yrdeg2, year received highest degree
11: wage79
...
27: wage95
28: exp79, actual experience
...
44: exp95
45: exps79, experience while enrolled in school
...
61: exps95
62: cexpm79, cumulated missing info dummies of expXX until year XX
...
78: cexpm95
79: cexpsm79, cumulated missing info dummies of expsXX until year XX
...
95: cexpsm95

Matching variables of the broad probit model:
96 96: ASVAB: math knowledge
** 97: ASVAB: auto + shop information
** 98: private school
** 99: expelled or suspended from school
** 100: curriculum college preparatory
** 101: curriculum general
97 102: highest grades of parents
** 102: occupation parents high
** 104: number of siblings
** 105: born in south
*/

/* If sample weights are not to be used, change S[.,4] into */
/* S[.,5] = ones(n,1); */

/* Choosing the matching variables X which also contain the id and the black and hisp dummies. */
X = S[.,1 3 4 8:9 96:cols(S) 6:7];

/* number of matching variables minus id, race-dummies, p.score, index score */
match = cols(X)-5;

format /rd 7,0;
print;
print "Number of matching variables" match;;
print;
print "Age caliper:              +/- 1";;
print;
print "Yrhs caliper:             +/- 1";;
print;

/*
X contains the variables:
01: id
02: p, estimated propensity score or index score
03: ind, estimated index score or index score
04: age 1979
05: year HS-diploma

06 06: ASVAB: math knowledge
** 07: ASVAB: auto + shop information
** 08: private school
** 09: expelled or suspended from school
** 10: curriculum college preparatory
** 11: curriculum general
07 12: highest grades of parents
** 13: occupation parents high
** 14: number of siblings
** 15: born in south
07 16: black
08 17: hispanic
*/

/* Select the treated */
X1 = selif(X,S[.,2]);
nn1 = rows(X1);

/* Year of the degree of treated */
deg1 = selif(S[.,10],S[.,2]);

/* Wage information of treated until 1994 */
wage1 = selif(S[.,1 11:26],S[.,2]);

/* Experience information of treated until 1994 */
exp1  = selif(S[.,1 28:43],S[.,2]);

/* Sample weights of treated */
swei1 = selif(S[.,5],S[.,2]);

/* Select the untreated */
X0 = selif(X,1-S[.,2]);
nn0 = rows(X0);

/* Wage information of untreated until 1994 */
wage0 = selif(S[.,1 11:26],1-S[.,2]);

/* Experience information of untreated until 1994 */
exp0  = selif(S[.,1 28:43],1-S[.,2]);

output off;

clear X, S, k, n;

group(X1,nn1,deg1,wage1,exp1,X0,nn0,wage0,exp0,swei1,pcal);


/*****************************************************************************
End of Monte Carlo iterations
*****************************************************************************/

mciter = mciter + 1;
endo;


/*****************************************************************************
End of outer iterations
*****************************************************************************/

iter = iter + 1;
endo;


/*****************************************************************************
MAIN PROCEDURE group
*****************************************************************************/

PROC(0) = group(X1,n1,deg1,wage1,exp1,X0,n0,wage0,exp0,swei1,pcal);
LOCAL V,s1,s0,a1,after,DIST,DIS,WAG,EXC,PRO,IND,arcs,arcout,arcinfo,nodes,nowg1,lost1,lost0,notr,prop1,prop0,ind1,ind0,
str1,str1b,str1c,str1d,str1e,str2,str3,str4;

/* Computation of X's variance to norm the distance (Mahalanobis-metric) */
/* "match" is the number of controls, which is globally defined */
V = (X1[.,4:(match+3)]|X0[.,4:(match+3)])'(X1[.,4:(match+3)]|X0[.,4:(match+3)])-
    (n1+n0)*(meanc(X1[.,4:(match+3)]|X0[.,4:(match+3)]))*
            (meanc(X1[.,4:(match+3)]|X0[.,4:(match+3)]))';
    /* "match" cross "match" matrix */

/* Generate Mahalanobis distance matrix */
{DIST,wage1,exp1,wage0,exp0,s1,s0,a1,prop1,prop0,ind1,ind0} =
    distance(X1,wage1,exp1,X0,wage0,exp0,deg1,pcal,V,swei1);

clear X1,X0,deg1,V;


/*******************************************/
/* Output of fixed results                 */
/*******************************************/
format /rd 4,0;
outfix(pcal,s1,n1,a1,s0,n0);


/*******************************************/
/* Wage information after college          */
/*******************************************/

after = 1;
do while after<=aftermax;

{DIS,WAG,EXC,PRO,IND,nowg1,lost1,lost0,notr} = wageinfo(DIST,wage1,exp1,wage0,exp0,prop1,prop0,ind1,ind0,after);
/*
DIS: matrix of distances which is different from DIST because DIS is already adjusted for
     missing wage information, something that depends on the year "after".
WAG: matrix of wages
EXC: matrix of experiences
PRO: matrix of propensity scores
IND: matrix of index scores
*/

/* notr = dummy if no treated can be matched */
if notr==1;
   print;
   print "No treated" after "years after college";;
   print;
   after=after+1;
   continue;
endif;


/*******************************************/
/* Output of variable results              */
/*******************************************/
format /rd 4,0;
outvar(nowg1,lost1,lost0,after,DIS,n1);


/*******************************************/
/* Prepare for Postxx.g                    */
/*******************************************/
{arcout,arcinfo} = greed(DIS,WAG,EXC,PRO,IND);

format /rd 7,0;

str1 = "D:\\Matching Extremes\\Sas\\f";
str1b = ftos(iter,"%*.*lf",1,0);
str1c = "\\MC";
str1d = ftos(mciter,"%*.*lf",1,0);
str1e = "\\arcout";
str2 = ftos(after,"%*.*lf",1,0);
str3 = ".txt";
str4  = str1$+str1b$+str1c$+str1d$+str1e$+str2$+str3;

output file = ^str4 on;
screen off;
print arcout;
output off;

str1 = "D:\\Matching Extremes\\Arcinfo\\f";
str1e = "\\arcinf";
str3 = ".out";
str4  = str1$+str1b$+str1c$+str1d$+str1e$+str2$+str3;

format /rd 8,4;
output file = ^str4 on;
screen off;
print arcinfo;
output off;

screen on;

after = after + 1;
endo;

clear V,s1,s0,a1,after,DIS,WAG,EXC,PRO,IND,arcs,arcout,arcinfo,nodes,nowg1,lost1,lost0,notr,prop0;

ENDP;

/*****************************************************************************
The end
*****************************************************************************/

output off;
end;


/****************************************************************************/
/****************************************************************************/


/* PROCEDURES */

/*****************************************************************************
PROCEDURE outfix
*****************************************************************************/

PROC (0) = outfix(pcal,s1,n1,a1,s0,n0);

output file="D:\\Matching Extremes\\gauss\\gpair08.out" on;
screen on;

format /rd 7,3;
if score==0;
    print "Propensity score caliper: +/-" pcal;;
elseif score==1;
    print "Index score caliper: +/-" pcal;;
endif;
format /rd 7,0;
print;
print "---------------------------------------------------------------------";
print s1 "out of" n1 "treated will not find a control.";;
print;
print n1-s1 "out of" n1 "can choose on average among roughly " a1 "controls (controls might be counted more than once).";;
print;
print s0 "out of" n0 "untreated will not be used.";;
print;
print n0-s0 "out of" n0 "can be used by" n1-s1 "treated.";;
print;
print "=====================================================================";
print;

output off;

ENDP;


/*****************************************************************************
PROCEDURE outvar
*****************************************************************************/

PROC (0) = outvar(nowg1,lost1,lost0,after,DIS,n1);

output file="D:\\Matching Extremes\\gauss\\gpair08.out" on;

print "Results" after "year(s) after college";;
print;
print "---------------------------------------------------------------------";
print lost1 "treated units have lost all their potential controls because the latter do
             not have wage information" after "year(s) after college.";;
print;
print lost0 "untreated units have lost all their potential treated because they have no
             wage information" after "year(s) after college.";;
print;
print nowg1 "treated do not have a wage" after "year(s) after college.";;
print;
print "---------------------------------------------------------------------";
print "Finally, only" (rows(DIS)-2) "treated units out of" n1 "can be matched.";;
print;
print "---------------------------------------------------------------------";
print;

output off;

ENDP;


/*****************************************************************************
PROCEDURE distance
*****************************************************************************/

PROC (12) = distance(X1,wage1,exp1,X0,wage0,exp0,deg1,pcal,V,swei1);
LOCAL DIST, hs1, hs0, n1, n0, cl, i, j, mate1, mate0, s1, s0, a1, prop1, prop0, ind1, ind0;

/* Generate the distance matrix DIST. It is of order n1+2 cross n0+4.

The first column shows the id's of the treated, the second yrdeg2 and the third
(yrdeg2-yrhs) of the treated, the last column contains the sample weights of
the treated. The first row shows the id's, the second yrhs of the untreated.

The matrix contains the distances from each treated to each untreated unit. If
for some units it is not allowed to be matched together - due to age or yrhs -
their distance is set to -1. */

/* swei1 are the sample weights of the treated */

/* The scores of the untreated */
prop0 = X0[.,2];
ind0  = X0[.,3];
/* The scores of the treated */
prop1 = X1[.,2];
ind1  = X1[.,3];

format /rd 4,0;
n1 = rows(X1);
n0 = rows(X0);
print "Original number of treated (rows X1)   " n1;;
print;
print "Original number of untreated (rows X0) " n0;;
print;

DIST = (0|X1[.,1])~((X0[.,1]')|(-ones(n1,n0)));

/* To count the number of possible controls for every treated. */
mate1 = X1[.,1]~zeros(n1,1);                                    /* 2 cross n1 */
/* To count the number of possible treated for every untreated. */
mate0 = X0[.,1]~zeros(n0,1);                                    /* 2 cross n0 */

/* Generating the distance matrix A */
cl = cols(V);

print; print "number of var" cl;

i = 1;
do while i<=n1;
   j = 1;
   do while j<=n0;
      if ((X0[j,2+score]-pcal <= X1[i,2+score] AND X1[i,2+score] <= X0[j,2+score]+pcal) AND
         (X0[j,4]-1     <= X1[i,4] AND X1[i,4] <= X0[j,4]+1 )   AND
         (X0[j,5]-1     <= X1[i,5] AND X1[i,5] <= X0[j,5]+1 )   AND
         (X0[j,cl+4] == X1[i,cl+4]) AND (X0[j,cl+5] == X1[i,cl+5]));
         if mahal==1;
             DIST[i+1,j+1] = (X1[i,4:(cl+3)]-X0[j,4:(cl+3)])*
                       invpd(V)*(X1[i,4:(cl+3)]-X0[j,4:(cl+3)])';
         elseif mahal==0;
             DIST[i+1,j+1] = abs(X1[i,2+score] - X0[j,2+score]);
         endif;
         /* The black and hisp dummies are not used neither for matching nor
            in V but to ensure that different races are not matched. */
         mate1[i,2] = mate1[i,2] + 1;
         mate0[j,2] = mate0[j,2] + 1;
      endif;
   j = j + 1;
   endo;
i = i + 1;
endo;


/* How many treated will definitively not find a partner */
i  = 1;
s1 = sumc(mate1[.,2].==0);
/*do while i<=n1;
   if mate1[i,2]==0;
      s1 = s1 + 1;
   endif;
   i = i + 1;
endo;*/
/* average number of mates of those who find one */
a1 = sumc(mate1[.,2])/(n1-s1);

/* How many untreated will definitively not find a partner */
i  = 1;
s0 = sumc(mate0[.,2].==0);
/*do while i<=n0;
   if mate0[i,2]==0;
      s0 = s0 + 1;
   endif;
   i = i + 1;
endo;*/

/* Include information in matrix DIST:
year degree of treated: after this year wages will be compared,
year bachelor minus year HS for treated: treatment span,
year HS of controls.
*/

hs1 = X1[.,5];
hs0 = X0[.,5];

/* Add sample weights to A in its last column. */
DIST = (DIST[1,1]|0|DIST[2:(n1+1),1])~(0|0|deg1)~(0|0|(deg1-
       hs1))~(DIST[1,2:(n0+1)]|hs0'|DIST[2:(n1+1),2:(n0+1)])~(0|0|swei1);

/* Now delete the rows and colums with only -1, i.e. those units who will not be matched. */
DIST  = selif(DIST,1|1|(mate1[.,2]));
deg1  = selif(deg1,mate1[.,2]);
wage1 = selif(wage1,mate1[.,2]);
exp1  = selif(exp1,mate1[.,2]);
prop1 = selif(prop1,mate1[.,2]);
ind1  = selif(ind1,mate1[.,2]);
DIST  = (selif(DIST',1|1|1|(mate0[.,2])|1))';
wage0 = selif(wage0,mate0[.,2]);
exp0  = selif(exp0,mate0[.,2]);
prop0 = selif(prop0,mate0[.,2]);
ind0  = selif(ind0,mate0[.,2]);

RETP(DIST,wage1,exp1,wage0,exp0,s1,s0,a1,prop1,prop0,ind1,ind0);
clear DIST, hs1, hs0, n1, n0, i, j, mate1, mate0, s1, s0, a1, prop1, prop0, ind1, ind0;
ENDP;


/*****************************************************************************
PROCEDURE wageinfo
*****************************************************************************/

PROC (9) = wageinfo(DIS,wage1,exp1,wage0,exp0,prop1,prop0,ind1,ind0,after);
LOCAL WAG, EXC, PRO, IND, n1, n0, i, j, k, nowg1, mate, mati, lost1, lost0;

/* DIS contains the sample weights in its last column. */
n1 = rows(DIS)-2;
n0 = cols(DIS)-4;

/* WAG will be the corresponding matrix to DIS but containing wage information of
the treated and their real controls instead of id's. At the beginning, */
WAG = 1|1|zeros(n1,1);

/* PRO will be the corresponding matrix to DIS but containing prop.scores of the
treated and their real controls instead of id's. At the beginning, */
PRO = 1|1|prop1;

/* IND will be the corresponding matrix to DIS but containing index scores of the
treated and their real controls instead of id's. At the beginning, */
IND = 1|1|ind1;

/* EXC will be the corresponding matrix to DIS but containing experience of the
treated and their real controls instead of id's. At the beginning, */
EXC = 1|1|zeros(n1,1);

i = 3;
do while i<=n1+2;
   if DIS[i,2]+after>=79 AND DIS[i,2]+after<=94;
      WAG[i] = wage1[i-2,DIS[i,2]+after-77];
      EXC[i] = exp1[i-2,DIS[i,2]+after-77];
   endif;
   i = i + 1;
endo;

/* how many treated units do not have a wage ... year after college? */
nowg1 = n1 - (sumc(WAG.>0)-2);

/* If there is no treated anymore go back. */
if sumc(WAG.>0)-2==0;
   RETP(0,0,nowg1,0,0,0,1);
elseif sumc(WAG.>0)-2/=0;

/* Drop the treated units without wage information in the examined year after
college diploma or without a wage in the panel range. */
DIS = selif(DIS,WAG.>0);
/* PRO is the matrix that will contain the prop.scores. */
PRO = selif(PRO,WAG.>0)~DIS[.,2:(n0+4)];
/* IND is the matrix that will contain the index scores. */
IND = selif(IND,WAG.>0)~DIS[.,2:(n0+4)];
/* EXC must have the same shape as DIS and also contains the sample weights in its last column. */
EXC = selif(EXC,WAG.>0)~DIS[.,2:(n0+4)];
/* WAG must have the same shape as DIS and also contains the sample weights in its last column. */
WAG = selif(WAG,WAG.>0)~DIS[.,2:(n0+4)];

n1 = rows(DIS)-2;

/* If a control does not have wage information because it is missing or the
year after "potential college" is out of the panel range, it will not be a
potential control anymore. The panel ends in 1994, 1996 is not used */

/* and counting the number of potential controls for each treated */
mate = -1|-1|zeros(n1,1);

/* and the number of potential treated for each untreated */
mati = -1|-1|-1|zeros(n0,1)|-1;

i = 3;
do while i<=n1+2;
   j = 4;
   do while j<=n0+3;
      if DIS[i,j]/=-1 AND DIS[2,j]+DIS[i,3]+after>=79 AND
                    DIS[2,j]+DIS[i,3]+after<=94;
         WAG[i,j] = wage0[j-3,(DIS[2,j]+DIS[i,3]+after-77)];
         EXC[i,j] = exp0[j-3,(DIS[2,j]+DIS[i,3]+after-77)];
         PRO[i,j] = prop0[j-3];
         IND[i,j] = ind0[j-3];
         if WAG[i,j]==-1;
            DIS[i,j] = -1;
            PRO[i,j] = -1;
            IND[i,j] = -1;
            EXC[i,j] = -1;
         endif;
      elseif DIS[i,j]/=-1 AND (DIS[2,j]+DIS[i,3]+after<79 OR
         DIS[2,j]+DIS[i,3]+after>94);
         DIS[i,j] = -1;
         WAG[i,j] = -1;
         PRO[i,j] = -1;
         IND[i,j] = -1;
         EXC[i,j] = -1;
      endif;
      if DIS[i,j]/=-1;
         mate[i] = mate[i] + 1;
         mati[j] = mati[j] + 1;
      endif;
   j = j + 1;
   endo;
i = i + 1;
endo;

/* Now WAG, PRO, EXC have the same shape as DIS, but WAG contains wages where DIS contains
distances or the id's of the treated, and PRO contains propensity scores where DIS
contains distances or the id's of the treated. All contain the sample weights
in their last columns. */

/* Some treated units might have lost their potential controls. */
DIS = selif(DIS,mate./=0);
WAG = selif(WAG,mate./=0);
PRO = selif(PRO,mate./=0);
IND = selif(IND,mate./=0);
EXC = selif(EXC,mate./=0);
lost1 = n1 - (rows(DIS)-2);
mate = selif(mate,mate./=0);
n1 = rows(DIS)-2;

/* If there is no treated anymore go back. */
if n1==0;
   RETP(0,0,nowg1,lost1,0,0,1);
elseif n1/=0;

/* Some untreated units might have no potential treated. They must be eliminated. */
DIS = selif(DIS',mati./=0)';
WAG = selif(WAG',mati./=0)';
PRO = selif(PRO',mati./=0)';
IND = selif(IND',mati./=0)';
EXC = selif(EXC',mati./=0)';
lost0 = n0 - (cols(DIS)-4);
mati = selif(mati,mati./=0);
n0 = cols(DIS)-4;

RETP(DIS,WAG,EXC,PRO,IND,nowg1,lost1,lost0,0);
endif;
endif;
clear PRO, IND, EXC, WAG, n1, n0, i, j, k, nowg1, mate, lost1, lost0;
ENDP;


/*****************************************************************************
PROCEDURE greed
*****************************************************************************/

/* Note that results of this procedure may vary each time it is used because of the
random start number of the treated. Sometimes more, sometimes less strata are generated
with generally different strata members. This procedure is also used to calculate the raw
differences. */

PROC(2) = greed(DIS,WAG,EXC,PRO,IND);
LOCAL n1,rdvec,i,indi,arcout,arcinfo;

n1 = rows(DIS)-2;

/* First, produce a vector of uniform random variables for all treated */
rdvec = rndu(n1,1);

/* Add this vector to DIS, WAG, EXC, PRO, IND and sort the rows
accordingly, such that the starting point of the greedy algorithm is random. */
DIS = sortc((-2|-1|rdvec)~DIS,1);
WAG = sortc((-2|-1|rdvec)~WAG,1);
EXC = sortc((-2|-1|rdvec)~EXC,1);
PRO = sortc((-2|-1|rdvec)~PRO,1);
IND = sortc((-2|-1|rdvec)~IND,1);

DIS = DIS[.,2:cols(DIS)];
WAG = WAG[.,2:cols(WAG)];
EXC = EXC[.,2:cols(EXC)];
PRO = PRO[.,2:cols(PRO)];
IND = IND[.,2:cols(IND)];

arcout  = zeros(1,3);
arcinfo = zeros(1,12);

/* Take the first treated and find the untreated with lowest distance and mark that
untreated by -1. Continue with the second treated. Some treated might not find controls
with finite distances. They are removed from the sample. */

print "before loop:";;
format 7,4; print time; print;

i = 3;
do while i <= rows(DIS);
    if counts(DIS[i,.]',-1)==cols(DIS)-4;
        i = i + 1;
    else;
        indi = minindc(miss(DIS[i,4:(cols(DIS)-1)],-1)') + 3;
        arcout  = arcout|(DIS[i,1]~DIS[1,indi]~1);
        arcinfo = arcinfo|(DIS[i,1]~DIS[1,indi]~WAG[i,1]~WAG[i,indi]~PRO[i,1]~PRO[i,indi]~IND[i,1]~IND[i,indi]~
                      DIS[i,cols(DIS)]~DIS[i,2]~EXC[i,1]~EXC[i,indi]);
        /* Delete the column of the used control unit in order to accelerate the algorithm */
        DIS = DIS[.,1:(indi-1) (indi+1):cols(DIS)];
        WAG = WAG[.,1:(indi-1) (indi+1):cols(WAG)];
        EXC = EXC[.,1:(indi-1) (indi+1):cols(EXC)];
        PRO = PRO[.,1:(indi-1) (indi+1):cols(PRO)];
        IND = IND[.,1:(indi-1) (indi+1):cols(IND)];
        i = i + 1;
    endif;
endo;

print "after loop:";; format 7,4; print time; print;
print "However, only ";; format 3,0; rows(arcout)-1;; print "pairs could be produced.";

/* Generate arcs from the supersource to the treated and from controls to the supersource. */
arcout = arcout|(zeros(rows(arcout)-1,1)~arcout[2:rows(arcout),1]~zeros(rows(arcout)-1,1))
               |(arcout[2:rows(arcout),2]~zeros(rows(arcout)-1,2));

/* Delete first row of zeros. The deletion for arcout is done in postxx.g */
arcinfo = arcinfo[2:rows(arcinfo),.];

/* Sort the outputs according to the id of the treated. If this is not done, POSTxx.G
will combine the wrong arcinfo and arcout because it will sort arcout before. */

arcout  = sortmc(arcout,1|2);
arcinfo = sortmc(arcinfo,1|2);
/* Delete the columns of id's in arcinfo */
arcinfo = arcinfo[.,3:cols(arcinfo)];


RETP(arcout,arcinfo);
clear n1,i,arcout,arcinfo,rdvec;
ENDP;
