% Simulations based on DTUS for:
% Cheng Chou, Ruoyao Shi, "What Time Use Surveys Can (And Cannot) Tell Us
% About Labor Supply", Journal of Applied Economics, 2021, forthcoming
% Sub-sample from DTUS to illustrate properties of the estimators
% Vary sample size, and correlation b/w X & Z
% Randomly pick one out of seven days & compare with one week (Table 1)
% Then ranomly pick one out of five & compare with five day total (Table A1)
% Set the working directory to contain data "mtus.txt", "est.m", and "est5.m".
% Ruoyao Shi

clear; clc;

%% Load DTUS data & sample selection
% Select aged 25-54, work positive time in a week
temp        = readtable('mtus.txt');
% 15 - age; 23 - CPS type hours; 27 - ATUS type minutes
temp        = [table2array(temp(:,15)) table2array(temp(:,23)) table2array(temp(:,27))/60];
temp        = temp(temp(:,1)>24,:); 
temp        = temp(temp(:,1)<55,:);
hour        = transpose(reshape(temp(:,3),[7,length(temp(:,3))/7]));
temp1       = reshape(temp(:,2),[7,length(temp(:,2))/7]);
cps_week    = temp1(1,:)';
clear temp temp1;
keep_ind  = (sum(hour,2)>0 & cps_week>0);
% 7 day DTUS
hour        = hour(keep_ind,:);
cps_week    = cps_week(keep_ind,:); clear keep_ind;
N           = length(cps_week);
% 5 day DTUS (first day is Sunday)
hour5       = hour(:,2:(end-1));

%% Generate H & X & Z
rng(1951)

% 7 day DTUS
coeff       = pca(hour); % columns of coeff - coef. of PCs
z           = hour * coeff(:,1);
en          = normrnd(0,2,size(hour));
hour        = hour + en;
week        = sum(hour,2);
cps_week    = cps_week + sum(en,2);
rho_grid    = [0,.385,.92,2.55]; % correlation b/w X & U
u           = week - sum(coeff(:,1)) * z;
x           = z * ones(1,length(rho_grid))+ sum(en,2) * rho_grid;

% 5 day DTUS
coeff5      = pca(hour5);
z5          = hour5 * coeff5(:,1);
en5         = en(:,2:(end-1));
hour5       = hour5 + en5;
week5       = sum(hour5,2);
u5          = week5 - sum(coeff5(:,1)) * z5;
x5          = z5 * ones(1,length(rho_grid))+ sum(en5,2) * rho_grid;
%% Resampling scheme
n_grid      = [250,500,1000,2500];
para        = [kron(n_grid,ones(1,length(rho_grid))); kron(ones(1,length(n_grid)),1:length(rho_grid))];
[~,K]       = size(para);
n_values    = para(1,:); rho_indices = para(2,:);
R = 10000;

%% Estimation
B0  = zeros(K,R); % CPS estimator
B1  = zeros(K,R); B15 = zeros(K,R); % week estimator (full week & 5 day)
B2  = zeros(K,R); B25 = zeros(K,R); % impute estimator (full week & 5 day)
B3  = zeros(K,R); B35 = zeros(K,R); % pool estimator (full week & 5 day)
B4  = zeros(K,R); B45 = zeros(K,R); % day estimator (full week & 5 day)

parfor r = 1:R
    for k = 1:K
        n           = n_values(k); rho_index = rho_indices(k);
        sample_id   = unidrnd(N,[n,1]); % obs. ID no. of a resampled sample
        t           = unifrnd(0,1,[n,1]);
        D           = (t>0) + (t>.25) + (t>.35) + (t>.45) + (t>.55) + (t>.65) + (t>.75); % Randomly draw one day with prob. (.25,.1,.1,.1,.1,.1,.25)
        D5          = (t>0) + (t>.2) + (t>.4) + (t>.6) + (t>.8); % same individuals, but potentially different days
        CPS_week    = cps_week(sample_id);
        Week        = week(sample_id); Week5 = week5(sample_id);
        ATUS_day    = diag(hour(sample_id,D)); ATUS_day5 = diag(hour5(sample_id,D5));
        X           = x(sample_id,rho_index); X5 = x5(sample_id,rho_index);
        Z           = z(sample_id); Z5 = z5(sample_id);
        b0          = est(CPS_week,[],X,Z,1);   B0(k,r)  = b0(2);
        b1          = est(Week,[],X,Z,1);       B1(k,r)  = b1(2);
        b15         = est5(Week5,[],X5,Z5,1);   B15(k,r) = b15(2);
        b2          = est(ATUS_day,D,X,Z,2);    B2(k,r)  = b2(2);
        b25         = est5(ATUS_day5,D5,X5,Z5,2);B25(k,r) = b25(2);
        b3          = est(ATUS_day,D,X,Z,3);    B3(k,r)  = b3(2);
        b35         = est5(ATUS_day5,D5,X5,Z5,3);B35(k,r) = b35(2);
        b4          = est(ATUS_day,D,X,Z,4);    B4(k,r)  = b4(2);
        b45         = est5(ATUS_day5,D5,X5,Z5,4);B45(k,r) = b45(2);
    end
end

%% Summary statistics of estimator performance
MSE0 = zeros(K,1); Bias0 = zeros(K,1); Var0 = zeros(K,1);
MSE1 = zeros(K,1); Bias1 = zeros(K,1); Var1 = zeros(K,1);
MSE2 = zeros(K,1); Bias2 = zeros(K,1); Var2 = zeros(K,1);
MSE3 = zeros(K,1); Bias3 = zeros(K,1); Var3 = zeros(K,1);
MSE4 = zeros(K,1); Bias4 = zeros(K,1); Var4 = zeros(K,1);
MSE15 = zeros(K,1); Bias15 = zeros(K,1); Var15 = zeros(K,1);
MSE25 = zeros(K,1); Bias25 = zeros(K,1); Var25 = zeros(K,1);
MSE35 = zeros(K,1); Bias35 = zeros(K,1); Var35 = zeros(K,1);
MSE45 = zeros(K,1); Bias45 = zeros(K,1); Var45 = zeros(K,1);
% True beta value
b = sum(coeff(:,1)); b5 = sum(coeff5(:,1));
for k = 1:K
    % MSE
    MSE0(k)     = (B0(k,:) - b) * (B0(k,:) - b)' / R;
    MSE1(k)     = (B1(k,:) - b) * (B1(k,:) - b)' / R;
    MSE2(k)     = (B2(k,:) - b) * (B2(k,:) - b)' / R;
    MSE3(k)     = (B3(k,:) - b) * (B3(k,:) - b)' / R;
    MSE4(k)     = (B4(k,:) - b) * (B4(k,:) - b)' / R;
    MSE15(k)     = (B15(k,:) - b5) * (B15(k,:) - b5)' / R;
    MSE25(k)     = (B25(k,:) - b5) * (B25(k,:) - b5)' / R;
    MSE35(k)     = (B35(k,:) - b5) * (B35(k,:) - b5)' / R;
    MSE45(k)     = (B45(k,:) - b5) * (B45(k,:) - b5)' / R;
    % Bias^2
    Bias0(k)    = (mean(B0(k,:)) - b)^2;
    Bias1(k)    = (mean(B1(k,:)) - b)^2;
    Bias2(k)    = (mean(B2(k,:)) - b)^2;
    Bias3(k)    = (mean(B3(k,:)) - b)^2;
    Bias4(k)    = (mean(B4(k,:)) - b)^2;
    Bias15(k)    = (mean(B15(k,:)) - b5)^2;
    Bias25(k)    = (mean(B25(k,:)) - b5)^2;
    Bias35(k)    = (mean(B35(k,:)) - b5)^2;
    Bias45(k)    = (mean(B45(k,:)) - b5)^2;
    % Variance
    Var0(k)     = (B0(k,:) - mean(B0(k,:))) * (B0(k,:) - mean(B0(k,:)))' / R;
    Var1(k)     = (B1(k,:) - mean(B1(k,:))) * (B1(k,:) - mean(B1(k,:)))' / R;
    Var2(k)     = (B2(k,:) - mean(B2(k,:))) * (B2(k,:) - mean(B2(k,:)))' / R;
    Var3(k)     = (B3(k,:) - mean(B3(k,:))) * (B3(k,:) - mean(B3(k,:)))' / R;
    Var4(k)     = (B4(k,:) - mean(B4(k,:))) * (B4(k,:) - mean(B4(k,:)))' / R;
    Var15(k)     = (B15(k,:) - mean(B15(k,:))) * (B15(k,:) - mean(B15(k,:)))' / R;
    Var25(k)     = (B25(k,:) - mean(B25(k,:))) * (B25(k,:) - mean(B25(k,:)))' / R;
    Var35(k)     = (B35(k,:) - mean(B35(k,:))) * (B35(k,:) - mean(B35(k,:)))' / R;
    Var45(k)     = (B45(k,:) - mean(B45(k,:))) * (B45(k,:) - mean(B45(k,:)))' / R;
end
disp([MSE0 MSE1 MSE2 MSE3 MSE4]);
disp([MSE15 MSE25 MSE35 MSE45]);
disp([Bias0 Bias1 Bias2 Bias3 Bias4]);
disp([Bias15 Bias25 Bias35 Bias45]);
disp([Var0 Var1 Var2 Var3 Var4]);
disp([Var15 Var25 Var35 Var45]);

%% Save simulation results to file
save('simulations.mat');
    
