clear
clc

% data structure:
%(1) wave of survey; (2) village identifier; (3) observation number within a village; 
%(4) village size; (5) whether the corresponding village is subject to PROGRESA; 
%(6) individual identifier; (7) sex; (8) age; (9) highest grade completed; 
%(10) whether child lives in a "poor" household; and (11) whether child is enrolled in a school.

data = csvread('data.csv',1,0); % read the dataset

%==============================================================================
% Part A: Construct permutations
% block permutation: i.e., we permute treatment status of villages
% therefore, individuals within a village will have the same treatment status.
%==============================================================================

B = 3000;            			            % number of permutations to consider
tmp = find(data(:,1)==1 & data(:,3)==1);    % select the first observation in each village in the baseline year
N = size(data,1);
M = size(tmp,1);                            % number of villages
D = data(tmp,5);                            % treatment vector at village level
numD = sum(D);                              % number of treated villages
Y = data(:,11);                             % the outcomes

% block permutation: village level
perms = zeros(M,B,'uint8');
    for i = 1:B
    p = randperm(M);
    for j = 1:numD
        perms(p(j),i) = 1;
    end
    end
    
% now expand the permuted treatment status in terms of individual level
obsperm = zeros(N,B,'uint8');
    for i = 1:1:M;
        tmp = find(data(:,2)==i);
        obsperm(tmp,:) = repmat(perms(i,:),size(tmp,1),1);
    end 

%==============================================================================
% Part B: Empirical analysis based on DI (difference in mean)
%==============================================================================

%------------------------------------------------------------------------------
% Part B1: Compute the outcomes based on subgroups and treatment status
% A subgroup is defined by sex and highest grade completed at the baseline
% survey (2 sexes times 0~9 HGC =20 groups)
%------------------------------------------------------------------------------

group = data(:,7)*10+data(:,9); 
sub =  unique(group);
S = size(sub,1);       	        % number of subgroups

% compute treatment effects based on the actual data
for g= 1:1:S;
    val = sub(g,1);
    tr = data(:,5);
    sel_tr = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==1);
    sel_cont = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==0);
    n1 = size(sel_tr,1);
    n0 = size(sel_cont,1);
    meantreat = mean(Y(sel_tr));
    meannotreat = mean(Y(sel_cont));
    diffvalue(g,1) = meantreat - meannotreat;
    vartreat = std(Y(sel_tr))^2;
    varnotreat = std(Y(sel_cont))^2;
    teststats(g,1) = abs(meantreat - meannotreat);
    
end

% compute treatment effects for each permutation
permstats = zeros(S,B);
for iter = 1:1:B;
    tr = obsperm(:,iter);
for g= 1:1:S;
    val = sub(g,1);
    sel_tr = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==1);
    sel_cont = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==0);
    n1 = size(sel_tr,1);
    n0 = size(sel_cont,1);
    meantreat = mean(Y(sel_tr));
    meannotreat = mean(Y(sel_cont));
    vartreat = std(Y(sel_tr))^2;
    varnotreat = std(Y(sel_cont))^2;
    permstats(g,iter) = abs(meantreat - meannotreat);  
end
end

%------------------------------------------------------------------------------
% Part B2: Convert the treatment effects into p-values based on single
% hypothesis testing
%------------------------------------------------------------------------------

permstats1 = [teststats permstats];

pvals = zeros(S,1);
for i = 1:S
      pvals(i) = 1 - (sum((permstats1(i,:)' >= teststats(i)*ones(B+1,1))))/(B+1); 
end

permpvals = zeros(S,B+1);
for j = 1:B+1;
    for i = 1:S
          permpvals(i,j) = 1 - ( sum((permstats1(i,:)' >= permstats1(i,j)*ones(B+1,1))))/(B+1);
    end
end

for i = 1:1:S;
    datac = pvals(i,1);
    critpermpvals = permpvals(i,:)';
    critpermpvals = sortrows(critpermpvals,-1);
    cut = find(datac*ones(B+1,1) >=critpermpvals);
    alpha = cut(1,1)/(B+1);
    sig_single(i,1) = 1-alpha;
end
   
sim = 1-sig_single;

%------------------------------------------------------------------------------
%Part B3: Conduct multiple hypothesis testing based on results from Part B2
%------------------------------------------------------------------------------

tmp = 1:1:20;
tmp = [tmp' sim];
tmp = sortrows(tmp, 2);

phi = zeros(S,1) -1;

for i = 1:1:S;
    
    id = tmp(i,1);
    ti = pvals(id,1);
    
    select = ones(S,1);
    if j>1
        sel = tmp(1:i-1,1);
        select(sel,1) = 0;
    end
    
    maxpermpvals = zeros(B+1,1);
    for j = 1:B+1;
        maxpermpvals(j,1) = max(select.*permpvals(:,j));
    end

    maxpermpvals = sortrows( maxpermpvals,-1);
     
    cut = find(ti*ones(B+1,1) >= maxpermpvals);
    alpha = cut(1,1)/(B+1);
    tmp(i,3) = 1-alpha;
end

test = sortrows(tmp,1);
test(:,3) = 1-test(:,3);

A2 = [diffvalue sim test(:,3)];
bon = sim*S;
holmtmp = [(1:1:S)', sim];
holmtmp = sortrows(holmtmp,2);
holmtmp = [holmtmp, (S:-1:1)'];
holmtmp = [holmtmp,holmtmp(:,2).*holmtmp(:,3)];
holmtmp = sortrows(holmtmp,1);
iota = ones(S,1);
tabletmp = [ A2, min(bon, iota), min(holmtmp(:,4),iota)]; 
AtableDI = [tabletmp(1+S/2:S,:); tabletmp(1:S/2,:)];

%==============================================================================
% Part C: Empirical analysis based on DID (diff in diff)
%==============================================================================

%------------------------------------------------------------------------------
% Part C1: Compute the outcomes based on subgroups and treatment status
% A subgroup is defined by sex and highest grade completed at the baseline
% survey (2 sexes times 0~9 HGC =20 groups)
%------------------------------------------------------------------------------

group = data(:,7)*10+data(:,9); 
sub =  unique(group);
S = size(sub,1);        	% number of subgroups
sig = zeros(S,1);			% significance of each null hypothesis
teststats = zeros(S,1);
diffvalue1 = zeros(S,1);
for g= 1:1:S;
    val = sub(g,1);
    tr = data(:,5);
    sel_tr_before   = find(data(:,1)<=2 & data(:,10)==1 & group==val & tr==1);
    sel_cont_before = find(data(:,1)<=2 & data(:,10)==1 & group==val & tr==0);
    sel_tr_after    = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==1);
    sel_cont_after  = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==0);
    meantreat_diff   = mean(Y(sel_tr_after))-mean(Y(sel_tr_before));
    meannotreat_diff = mean(Y(sel_cont_after))-mean(Y(sel_cont_before));
    diffvalue1(g,1) = meantreat_diff - meannotreat_diff;

    n1_before = size(sel_tr_before,1);
    n0_before = size(sel_cont_before,1);
    n1_after  = size(sel_tr_after,1);
    n0_after  = size(sel_cont_after,1);
    vartreat_diff = std(Y(sel_tr_after))^2/n1_after + std(Y(sel_tr_before))^2/n1_before;
    varnotreat_diff = std(Y(sel_cont_after))^2/n0_after + std(Y(sel_cont_before))^2/n0_before;
    teststats(g,1) = abs(diffvalue1(g,1));    
end

permstats = zeros(S,B);
for iter = 1:1:B;
    tr = obsperm(:,iter);
for g= 1:1:S;
     val = sub(g,1);
    sel_tr_before   = find(data(:,1)<=2 & data(:,10)==1 & group==val & tr==1);
    sel_cont_before = find(data(:,1)<=2 & data(:,10)==1 & group==val & tr==0);
    sel_tr_after    = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==1);
    sel_cont_after  = find(data(:,1)>2 & data(:,10)==1 & group==val & tr==0);
    meantreat_diff   = mean(Y(sel_tr_after))-mean(Y(sel_tr_before));
    meannotreat_diff = mean(Y(sel_cont_after))-mean(Y(sel_cont_before));
    diffvalue = meantreat_diff - meannotreat_diff;

    n1_before = size(sel_tr_before,1);
    n0_before = size(sel_cont_before,1);
    n1_after  = size(sel_tr_after,1);
    n0_after  = size(sel_cont_after,1);
    vartreat_diff = std(Y(sel_tr_after))^2/n1_after + std(Y(sel_tr_before))^2/n1_before;
    varnotreat_diff = std(Y(sel_cont_after))^2/n0_after + std(Y(sel_cont_before))^2/n0_before;
    permstats(g,iter) = abs(diffvalue);

end
end

%------------------------------------------------------------------------------
% Part C2: Convert the treatment effects into p-values based on single
% hypothesis testing
%------------------------------------------------------------------------------

permstats1 = [teststats permstats];
pvals = zeros(S,1);
for i = 1:S
     pvals(i) = 1 - (sum((permstats1(i,:)' >= teststats(i)*ones(B+1,1))))/(B+1); 
end

permpvals = zeros(S,B+1);
for j = 1:B+1;
    for i = 1:S
        permpvals(i,j) = 1 - ( sum((permstats1(i,:)' >= permstats1(i,j)*ones(B+1,1))))/(B+1);
    end
end

for i = 1:1:S;
    datac = pvals(i,1);
    critpermpvals = permpvals(i,:)';
    critpermpvals = sortrows(critpermpvals,-1);
    cut = find(datac*ones(B+1,1) >=critpermpvals);
    alpha = cut(1,1)/(B+1);
    sig_single(i,1) = 1-alpha;
end
   
sim = 1-sig_single;

%------------------------------------------------------------------------------
%Part C3: Conduct multiple hypothesis testing based on results from Part B2
%------------------------------------------------------------------------------

tmp = 1:1:20;
tmp = [tmp' sim];
tmp = sortrows(tmp, 2);

phi = zeros(S,1) -1;

for i = 1:1:S;
    
    id = tmp(i,1);
    ti = pvals(id,1);
    
    select = ones(S,1);
    if j>1
        sel = tmp(1:i-1,1);
        select(sel,1) = 0;
    end
    
    maxpermpvals = zeros(B+1,1);
    for j = 1:B+1;
        maxpermpvals(j,1) = max(select.*permpvals(:,j));
    end

    maxpermpvals = sortrows( maxpermpvals,-1);
     
    cut = find(ti*ones(B+1,1) >= maxpermpvals);
    alpha = cut(1,1)/(B+1);
    tmp(i,3) = 1-alpha;
end

test = sortrows(tmp,1);
test(:,3) = 1-test(:,3);

A4 = [diffvalue1 sim test(:,3)];
bon = sim*S;
holmtmp = [(1:1:S)', sim];
holmtmp = sortrows(holmtmp,2);
holmtmp = [holmtmp, (S:-1:1)'];
holmtmp = [holmtmp,holmtmp(:,2).*holmtmp(:,3)];
holmtmp = sortrows(holmtmp,1);
iota = ones(S,1);
tabletmp = [ A4, min(bon, iota), min(holmtmp(:,4),iota)]; 
AtableDID = [tabletmp(1+S/2:S,:); tabletmp(1:S/2,:)];

