%%%%% Empirical application in Han and Lee(2019)%%%%%
%%%%% Estimation of Parametric and Semiparametric Models %%%%%
%%%%% Bootstrap for Semiparametric Estimators is running %%%%%
%%%%% Normalizations: No constant terms and beta1 = alpha1 = 0 %%%%%
%%%%% To designate the variable for scale normalization, see line 77 %%%%%

clear; 
clc;

%% Data Loading 
data= xlsread('011719_dataset.xlsx');
n = size(data,1) ;

bb = 400 ; % Number of bootstrappings 

kne = floor(3* n^(1/7)) ; % Number of Polynomials for epsilon 
knv = floor(3* n^(1/7)) ;

cop_opt = 0 ;   % Choose a copula 

y = data(:,12) ;
d = data(:,10) ;

z_num = data(:,8) ; 
z_more = data(:,9) ;

x_fs = data(:,1);
x_msa = data(:,2) ;
x_age = data(:,3) ;
x_male = data(:,4) ;
x_educ = data(:,5) ;
x_region1 = data(:,13) ;
x_region2 = data(:,14) ;
x_region3 = data(:,15) ;
x_region4 = data(:,16) ; % Base dummy
x_white = data(:,17) ;
x_black = data(:,18) ;
x_asian = data(:,20) ;
x_race_min = data(:,19) ;  % Minority, base dummy
x_married = data(:,21) ;

x_phealth1 = data(:,33) ;
x_phealth2 = data(:,34) ;
x_phealth3 = data(:,35) ;
x_phealth4 = data(:,36) ;
x_phealth5 = data(:,37) ;

x_mhealth1 = data(:,38) ;
x_mhealth2 = data(:,39) ;
x_mhealth3 = data(:,40) ;
x_mhealth4 = data(:,41) ;
x_mhealth5 = data(:,42) ;

x_income = data(:,24) ;

z_std_num = data(:,28) ;

x_std_fs = data(:,25) ;
x_std_age = data(:,26) ;
x_std_income = data(:,27) ;
x_std_educ = data(:,29) ;

x_sick32 = data(:,31) ;
x_sick34 = data(:,32) ;

x_mat = [x_std_age, x_std_educ, x_std_fs, x_std_income, x_male, x_msa, x_region1, x_region2, x_region3, x_white, x_black, x_asian, x_married, x_phealth2, x_phealth3, x_phealth4, x_phealth5, x_mhealth2, x_mhealth3, x_mhealth4, x_mhealth5, x_sick32, x_sick34];
z_mat = [z_std_num, z_more] ;

x_par_mat = [ones(n,1), x_mat] ;


dx = size(x_mat, 2) ;
dx_par = size(x_par_mat, 2) ;
dz = size(z_mat, 2) ;

fix = dx; % Normalized coefficient in semiparametric models 

dataset = [y, d, x_mat, z_mat];  % used in fmincon 
par_dataset = [y, d, x_par_mat, z_mat];

%% Specifications : Copula

% Normal copula
if cop_opt == 0
    rho_mid = 0.5 ;
    rho_L = -0.9;
    rho_H = 0.9;
end
% Example 3.2: The Frank family
if cop_opt == 2
    rho_mid = 3.446;    % sp_rho = 0.5
    rho_L = rho_mid - 2;   % lower bound is infinity
    rho_H = rho_mid + 2;    % upper bound is infinity
    %EXCEPT rho = 0;
end    
% Example 3.3: The Clayton family (The Kimeldorf and Sampson family, in Joe)
if cop_opt == 3
    rho_mid = 1.076;    % sp_rho = 0.5
    rho_L = 0 + 0.01;   % lower bound is -1, but somehow error occurs...
    rho_H = rho_mid + 2;    % upper bound is infinity
    %EXCEPT rho = 0;
end% Example 3.5: The Gumbel family
if cop_opt == 5
    rho_mid = 1.541;    % sp_rho = 0.5
    rho_L = 1 + 0.01;   % lower bound is 1
    rho_H = rho_mid + 2;    % upper bound is infinity
end



%% Options

options = optimset('LargeScale','off', ...
               'HessUpdate','bfgs', ...
               'Algorithm', 'active-set',...
               'Hessian','off', ...
               'GradObj','off', ...
               'DerivativeCheck','off',...
               'Display', 'off', ...
               'MaxFunEvals', 1e8);

%% Estimation
kk = 2*dx + dz + 2 ; % alpha, gamma, beta, delta, rho 
kk_par = 2*dx_par + dz + 2; % For parametric models, we have a constant term

if cop_opt == 0
    copula_hat = @(u1,u2,rho)copulacdf('Gaussian',[u1,u2],[1,rho;rho,1]);
    %copula = @(u1,u2,rho)normcop(u1,u2,rho);
elseif cop_opt == 2
    copula_hat = @(u1,u2,rho)copulacdf('Frank',[u1,u2],rho);
elseif cop_opt == 3
    copula_hat = @(u1,u2,rho)copulacdf('Clayton',[u1,u2],rho);
elseif cop_opt == 5
    copula_hat = @(u1,u2,rho)copulacdf('Gumbel',[u1,u2],rho);
end


x_semi_bar = mean(x_mat) ;
x_semi_q50 = quantile(x_mat, 0.5) ;
x_semi_q25 = quantile(x_mat, 0.25) ;
x_semi_q75 = quantile(x_mat, 0.75) ;

x_par_bar = mean(x_par_mat) ;
x_par_q50 = quantile(x_par_mat, 0.5) ;
x_par_q25 = quantile(x_par_mat, 0.25) ;
x_par_q75 = quantile(x_par_mat, 0.75) ;

%% Parametric Model Estimation: for initial value of semiparametric estimation 


first_mat = [ones(n,1), x_mat,z_mat] ;

first_est = inv(first_mat' * first_mat) * first_mat' * d ;
d_hat = first_mat * first_est ;

alpha_tsls_init = first_est(2:dx+1) ;
gamma_tsls_init = first_est(dx+2:dx+1+dz) ;

second_mat_hat = [ones(n,1), x_mat, d_hat];
tsls = inv(second_mat_hat' * second_mat_hat) * second_mat_hat' * y;

beta_tsls_init = tsls(2:dx+1) ;
delta_tsls_init = tsls(dx+2) ;

norm_par_0 = [first_est; tsls; -rho_mid];

[norm_par_est, norm_logL_val] = fmincon(@(par)logL(cop_opt, par_dataset, par, dx_par, dz),norm_par_0, [], [], [], [], [], [], @(par)logL_constraint(par, dx_par, dz, cop_opt),options);

par_est_alpha = norm_par_est(2:dx_par) ; %without constant
par_est_alpha_cons = norm_par_est(1); %alpha coefficient on the constant 
par_est_gamma = norm_par_est(dx_par + 1: dx_par + dz) ;
par_est_beta_cons = norm_par_est(dx_par + dz + 1) ; % beta coefficient on the constant 
par_est_beta = norm_par_est(dx_par + dz + 2: dx_par*2 + dz) ; %without constant
par_est_delta = norm_par_est(dx_par*2 + dz + 1) ;
par_est_rho = norm_par_est(kk_par) ;

par_est_theta = [par_est_alpha; par_est_gamma; par_est_beta; par_est_delta; par_est_rho] ;

par_est_ate_mean = normcdf(x_par_bar*[par_est_beta_cons; par_est_beta] + par_est_delta, 0, 1) - normcdf(x_par_bar*[par_est_beta_cons; par_est_beta], 0, 1) ;
par_est_ate_q50 = normcdf(x_par_q50*[par_est_beta_cons; par_est_beta] + par_est_delta, 0, 1) - normcdf(x_par_q50*[par_est_beta_cons; par_est_beta], 0, 1) ;
par_est_ate_q25 = normcdf(x_par_q25*[par_est_beta_cons; par_est_beta] + par_est_delta, 0, 1) - normcdf(x_par_q25*[par_est_beta_cons; par_est_beta], 0, 1) ;
par_est_ate_q75 = normcdf(x_par_q75*[par_est_beta_cons; par_est_beta] + par_est_delta, 0, 1) - normcdf(x_par_q75*[par_est_beta_cons; par_est_beta], 0, 1) ;

table_par_ate = [par_est_ate_mean; par_est_ate_q50; par_est_ate_q25; par_est_ate_q75] ;


par_logL_val = -norm_logL_val ;

xlswrite('norm_par_est_init.xlsx', par_est_theta) ;

%% Semiparametric Model Estimation

init_alpha = par_est_alpha ;
init_gamma = par_est_gamma ;
init_beta = par_est_beta ;
init_delta = par_est_delta ;
init_rho = par_est_rho ;

xie_0 = zeros(kne,1) ;
xiv_0 = zeros(knv,1) ;
cons_0 = 1 ;
    
semi_par_0 = [init_alpha; init_gamma; init_beta; init_delta; init_rho; xie_0; cons_0; xiv_0; cons_0];


% Here the base-coefficients are set (Scale normalization) %

alpha_base = init_alpha(fix) ;   
beta_base = init_beta(fix) ;

[semi_par_est, semi_logL_val] = fmincon(@(par)semi_logl(dataset, par, cop_opt, dx, dz, kne, knv),semi_par_0, [], [], [], [], [], [], @(par)semi_constraint_emp(par, cop_opt, dx, dz, beta_base, alpha_base, fix) ,options);


semi_est_alpha = semi_par_est(1:dx) ;
semi_est_gamma = semi_par_est(dx+1:dx+dz) ;
semi_est_beta = semi_par_est(dx+dz+1:2*dx+dz) ;
semi_est_delta = semi_par_est(2*dx+dz+1);
semi_est_rho = semi_par_est(kk);

semi_est_ate_mean = dist(normcdf(x_semi_bar*semi_est_beta + semi_est_delta,0,1), semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_bar*semi_est_beta,0,1), semi_par_est(kk+1:kk+kne+1), kne);
semi_est_ate_q50 = dist(normcdf(x_semi_q50*semi_est_beta + semi_est_delta,0,1), semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q50*semi_est_beta,0,1), semi_par_est(kk+1:kk+kne+1), kne);
semi_est_ate_q25 = dist(normcdf(x_semi_q25*semi_est_beta + semi_est_delta,0,1), semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q25*semi_est_beta,0,1), semi_par_est(kk+1:kk+kne+1), kne);
semi_est_ate_q75 = dist(normcdf(x_semi_q75*semi_est_beta + semi_est_delta,0,1), semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q75*semi_est_beta,0,1), semi_par_est(kk+1:kk+kne+1), kne);

table_semi_ate = [semi_est_ate_mean; semi_est_ate_q50; semi_est_ate_q25; semi_est_ate_q75] ;

sieve_logL_val = -semi_logL_val ;

xlswrite('semi_par_est.xlsx',semi_par_est) ;

if cop_opt == 0 
    par_est_sp = copulastat('Gaussian',par_est_rho,'type','Spearman');
    semi_est_sp = copulastat('Gaussian',semi_est_rho,'type','Spearman');

elseif cop_opt == 2 
    par_est_sp = copulastat('Frank',par_est_rho,'type','Spearman');
    semi_est_sp = copulastat('Frank',semi_est_rho,'type','Spearman');
end




%% Results 

alpha_table = [par_est_alpha, semi_est_alpha] ;
gamma_table = [par_est_gamma, semi_est_gamma] ;
beta_table = [par_est_beta, semi_est_beta];
delta_table = [par_est_delta, semi_est_delta];
rho_table = [par_est_rho, semi_est_rho];
sp_table = [par_est_sp, semi_est_sp] ;
ates_table = [table_par_ate, table_semi_ate] ;


table = [alpha_table; gamma_table; beta_table; delta_table; rho_table; sp_table; ates_table] ;

table 

save(sprintf('semi_empirical_estimation_%d_%d_%d.mat',cop_opt, kne, knv),'norm_par_est', 'semi_par_est', 'par_logL_val', 'sieve_logL_val', 'table', 'alpha_table', 'gamma_table', 'beta_table', 'delta_table', 'rho_table', 'sp_table', 'ates_table', 'cop_opt', 'kne', 'knv', 'dx', 'dz', 'kk', 'kk_par');



%% Bootstrap 

boot_semi_est_theta_store = zeros(bb, kk) ; 
boot_semi_sp_store = zeros(bb,1) ;
boot_semi_ate_bar_store = zeros(bb, 1) ;
boot_semi_ate_q50_store = zeros(bb, 1) ;
boot_semi_ate_q25_store = zeros(bb, 1) ; 
boot_semi_ate_q75_store = zeros(bb, 1) ;
boot_semi_logL_val_store = zeros(bb,1) ;

text_inform = 'Bootstrap starts' ; 
disp(text_inform) ; 

weight_mu = 1; 



parfor j = 1 : bb 
    
    weight = exprnd(weight_mu,n,1) ; 
    
    [boot_semi_par_est, boot_semi_logL_val] = fmincon(@(par)boot_semi_logl(dataset, par, cop_opt, dx, dz, kne, knv, weight),semi_par_0, [], [], [], [], [], [], @(par)semi_constraint_emp(par, cop_opt, dx, dz, beta_base, alpha_base, fix) ,options);

       
    boot_semi_est_theta_store(j,:) = boot_semi_par_est(1:kk)' ;
    boot_semi_logL_val_store(j,1) = -boot_semi_logL_val ; 
        
    boot_semi_est_beta = boot_semi_par_est(dx+dz+1:dx+dz+dx);
    boot_semi_est_delta = boot_semi_par_est(dx+dz+dx+1) ;
    
    
    if cop_opt == 0  
        boot_semi_sp_store(j,1) = copulastat('Gaussian',boot_semi_par_est(kk),'type','Spearman'); 
    elseif cop_opt == 2 
        boot_semi_sp_store(j,1) = copulastat('Frank',boot_semi_par_est(kk),'type','Spearman');    
    end
       
    boot_semi_ate_bar_store(j,1) = dist(normcdf(x_semi_bar*boot_semi_est_beta + boot_semi_est_delta,0,1), boot_semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_bar*boot_semi_est_beta,0,1), boot_semi_par_est(kk+1:kk+kne+1), kne) ;
    boot_semi_ate_q50_store(j,1) = dist(normcdf(x_semi_q50*boot_semi_est_beta + boot_semi_est_delta, 0, 1), boot_semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q50*boot_semi_est_beta, 0,1), boot_semi_par_est(kk+1:kk+kne+1), kne) ;
    boot_semi_ate_q25_store(j,1) = dist(normcdf(x_semi_q25*boot_semi_est_beta + boot_semi_est_delta, 0, 1), boot_semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q25*boot_semi_est_beta, 0,1), boot_semi_par_est(kk+1:kk+kne+1), kne) ;
    boot_semi_ate_q75_store(j,1) = dist(normcdf(x_semi_q75*boot_semi_est_beta + boot_semi_est_delta, 0, 1), boot_semi_par_est(kk+1:kk+kne+1), kne) - dist(normcdf(x_semi_q75*boot_semi_est_beta, 0,1), boot_semi_par_est(kk+1:kk+kne+1), kne) ;
      
    
end



boot_semi_est_alpha_store = boot_semi_est_theta_store(:,1:dx) ;
boot_semi_est_gamma_store = boot_semi_est_theta_store(:,dx+1:dx+dz) ;
boot_semi_est_beta_store = boot_semi_est_theta_store(:,dx+dz+1:2*dx+dz) ;
boot_semi_est_delta_store = boot_semi_est_theta_store(:,2*dx+dz+1) ;
boot_semi_est_rho_store = boot_semi_est_theta_store(:,kk) ;


boot_semi_ate_store = [boot_semi_ate_bar_store, boot_semi_ate_q50_store, boot_semi_ate_q25_store, boot_semi_ate_q75_store] ;

mean_boot_theta = mean(boot_semi_est_theta_store) ;
sd_boot_theta= std(boot_semi_est_theta_store) ;
mean_boot_ate = mean(boot_semi_ate_store) ;
sd_boot_ate = std(boot_semi_ate_store) ;


save(sprintf('semi_empirical_bootstrap_%d_%d_%d_%d.mat',cop_opt, bb, kne, knv),'semi_par_est', 'boot_semi_est_theta_store', 'boot_semi_ate_store', 'mean_boot_theta', 'sd_boot_theta', 'mean_boot_ate', 'sd_boot_ate', 'bb', 'dx', 'dz', 'kk', 'semi_par_0', 'boot_semi_logL_val_store', 'weight_mu') ;



