
% Define a variable that contains the path to the main folder;
mainfold = '';

addpath(fullfile(mainfold,'progs','matlab functions'))
addpath(fullfile(mainfold,'nlsy97'))
cd(fullfile(mainfold,'progs','data'))

%% Person information

pers = readtable('pop.csv','Delimiter',',','ReadVariableNames',true,'TreatAsEmpty',{'.'});
pers = sortrows(pers,{'id'});
pers.Properties.VariableNames{'wt_R1'} = 'pwt';
N = size(pers,1);

grp = nan(N,1);
grp(isnan(grp) & (pers.race_eth==1) & (pers.gender==1)) = 3;
grp(isnan(grp) & (pers.race_eth==1) & (pers.gender==2)) = 4;
grp(isnan(grp) & (pers.race_eth==2) & (pers.gender==1)) = 5;
grp(isnan(grp) & (pers.race_eth==2) & (pers.gender==2)) = 6;
grp(isnan(grp) & (pers.race==1) & (pers.gender==1)) = 1;
grp(isnan(grp) & (pers.race==1) & (pers.gender==2)) = 2;

pers.grp = grp;
pers(:,{'race','race_eth','gender'}) = [];

%% Measurement information
meas = readtable('meas.csv','Delimiter',',','ReadVariableNames',true,'TreatAsEmpty',{'.'});
meas = sortrows(meas,{'id'});
chk_measid = all(meas.id==pers.id);
meas(:,{'id'}) = [];
meas(:,find(~cellfun('isempty',strfind(meas.Properties.VariableNames,'arrest')))) = [];

measName = meas.Properties.VariableNames;
measSimp = regexprep(measName,'_(6|7|8)$','');

measType = zeros(size(measName));
for j = 1:numel(measType)
    I = ~isnan(meas{:,j});
    uniw = unique(meas{I,j});
    if numel(uniw)==2 | any(strcmp(measSimp(j),{'mom_parent_style','MomDegree','DadDegree','family_income_group'}))
        [~,w] = ismember(meas{:,j},uniw);
        meas{I,j} = w(I);
        measType(j) = numel(uniw);
    else measType(j) = inf;
    end
end

measInfo = struct('name',measName,...
            'type',num2cell(measType));

%% Choice part
panel = readtable('panel.csv','Delimiter',',','ReadVariableNames',true,'TreatAsEmpty',{'.'});
panel = sortrows(panel,{'id','age'});
chk_panelid = all(ismember(panel.id,pers.id));

%% DATA
DATA = table2struct(pers);

for i = 1:N

    DATA(i).meas = meas{i,:};
 
    P = panel(panel.id==pers.id(i),:);
    
    DATA(i).T = size(P,1);
    
    DATA(i).t = round(P.age-min(P.age))+1;
        
    DATA(i).hgc = P.hgc;
    
    DATA(i).EdProm = P.EdPromotion;
    
    DATA(i).Arrest = P.Arrest;
    
    DATA(i).Attempt = nan(DATA(i).T,1);
    for h = 9:15
        I = (DATA(i).hgc==h);
        DATA(i).Attempt(I) = (1:nnz(I));
    end
    
    DATA(i).measY = cell(1,numel(measName));
    DATA(i).measHas = ~isnan(DATA(i).meas);
    for j = find(DATA(i).measHas)
        if isinf(measType(j))
            DATA(i).measY{j} = DATA(i).meas(j);
        else
            DATA(i).measY{j} = bsxfun(@eq,DATA(i).meas(j),(1:measType(j))); 
        end
    end
        
end

%% Add X's

for i = 1:numel(DATA)

    X = promX(DATA(i));
    DATA(i).promX = cell(1,15);
    DATA(i).promY = cell(1,15);
    DATA(i).promN = zeros(1,15);
    for h = unique(DATA(i).hgc(DATA(i).hgc>=9 & DATA(i).hgc<=15))'
        I = (DATA(i).hgc==h);
        DATA(i).promX{h} = X(I,:);
        DATA(i).promY{h} = bsxfun(@eq,DATA(i).EdProm(I),(0:1));
        DATA(i).promN(h) = nnz(I);
    end
    DATA(i).promHas = find(DATA(i).promN>0);
    
    DATA(i).arrestX = arrestX(DATA(i));
    DATA(i).arrestY = bsxfun(@eq,DATA(i).Arrest,(0:1));
    DATA(i).arrestN = size(DATA(i).arrestX,1);
    
    DATA(i).typeX = typeX(DATA(i));
    
end

%% Drop observations
num_meas = cellfun('size',{DATA.measHas},2);

DATA(isnan(vertcat(DATA.grp))) = [];
     
pwt = numel(DATA)*vertcat(DATA.pwt)./sum(vertcat(DATA.pwt));
pwt = num2cell(pwt);
[DATA(:).pwt] = pwt{:};

%% save data
save('Gender Gap Data.mat','DATA','measInfo')







