% VERSION XVI
% This is the main file. It loads the data, defines the parameters and
% calls the likelihood minimization routine.
% Individuals choose between Education, Work and Unemployment
% where schooling is a 0,1,2,3 variable. It takes into account the distinction
% between participating in education and actually gaining the qualification
% needed to acces the following education stage.
% It is an optimal stopping problem.
% Choices are modelled between age 16-41.

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% [ This is an unbalanced panel! Individuals are kept in the survey as long as
%     we can define their status. ]
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

clear all
clear global
clc

global dimK2 dimK0 dimSc dimEc dimXc dimYp dimTc dimUc
global K2 K0 Sc Yp Xc Tc Uc
global scale
global beta tut rho
global Ab2Mat Ab0Mat

global ind1 ind2 ind3 ind4 ind5 ind6 ind7 ind8 ind9 ind10
global ind11 ind12 ind13 ind14 ind15 ind16 ind17 ind18 ind19 ind20 indW
global id Ab2 Ab0 Prd Sch Qua Wag Exp Q1 Q2 Q3 Ypr
global nn np ni no
global tollerance fhundle THETAF tWAG tLAM tDTA tPHI tSCH

% TData
global dTHETA Estimate Sequential 

% Unobserved Heterogeneity
global dimUc Uc
global idh niter

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% ROUTINE DATA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

Estimate        =1; % Estimate (1) or import estimated parameters (0)
Sequential      =1; % Estimate by Sequential Likelohood (1) or FIML (0)
Dynamic         =1; % Discount factor = 0.95 (1) or 0 (0)
Guess           =1; % Mini routine to give good guess for Rs. On (1), Off (0)
Simulation      =1; % Compute and display (1) simulated choices 
Saved           =0; % Use saved coefficients (1) as a guess

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% DATA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

load versionXVI.mat

id=data(:,1);
Ab2=data(:,2);
Ab0=data(:,3);
Ypr=data(:,4);
Prd=data(:,5);
Sta=data(:,6);
Sch=data(:,7);
Qua=data(:,8);
Typ=data(:,9);
Wag=data(:,10);
Exp=data(:,11);
Obs=(1:size(data,1))';

clear data;

%%% Data coding

scale=1e3;
Ypr=Ypr/scale;
Wag=Wag/scale;

kp=8;
Ab2=MyDiscretize(Ab2,kp);
Ab0=MyDiscretize(Ab0,kp);
Ypr=MyDiscretize(Ypr,kp);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% INDICATORS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

a=tabulate(id);
ni=sum(a(:,2)~=0); % N = number of individuals
no=length(id); % number of observations
np=max(Prd); % number of periods 

% Identify types
ind1    =find(Typ==1)   ; % W -> W
ind2    =find(Typ==2)   ; % U -> W
ind3    =find(Typ==3)   ; % U -> U
ind4    =find(Typ==4)   ; % W -> U
ind5    =find(Typ==5)   ; % S0 -> S1
ind6    =find(Typ==6)   ; % S0 -> U
ind7    =find(Typ==7)   ; % S1 -> S2 | Q1=1
ind8    =find(Typ==8)   ; % S1 -> W  | Q1=1
ind9    =find(Typ==9)   ; % S1 -> W  | Q1=0
ind10   =find(Typ==10)  ; % S1 -> U  | Q1=1
ind11   =find(Typ==11)  ; % S1 -> U  | Q1=0

ind13   =find(Typ==13)  ; % S2 -> S3 | Q2=1
ind14   =find(Typ==14)  ; % S2 -> W  | Q2=1
ind15   =find(Typ==15)  ; % S2 -> W  | Q2=0
ind16   =find(Typ==16)  ; % S2 -> U  | Q2=1
ind17   =find(Typ==17)  ; % S2 -> U  | Q2=0

ind19   =find(Typ==19)  ; % S3 -> W  | Q3=1
ind20   =find(Typ==20)  ; % S3 -> W  | Q3=1

indW=find(Sta==2&Wag~=0); %  -> W observed 
Q1=(Qua==1); % Schooling Dummies
Q2=(Qua==2); % Schooling Dummies
Q3=(Qua==3); % Schooling Dummies

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% MODEL
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%% State variables
A2=tabulate(Ab2);
A0=tabulate(Ab0);

% Grid
K2=A2(:,1);%MyGrid(Ab2,[0 33 50 66 100]); %        % skill age 16 grid (0,1,2...)
K0=A0(:,1);%MyGrid(Ab0,[0 33 50 66 100]); %        % skill age 7 grid (0,1,2...)
Sc=[0:max(Sch)]';                       % schooling grid
Yp=MyGrid(Ypr,[0]);                     % parental income grid (0,1,2...)
Xc=[0:max(Exp)]';                       % experience grid    
Tc=[1:(65-16)]';                        % time grid
Uc=[0:2]';                              % unobserved heterogeneity grid

% Dimensions
dimEc=4;              % shock
dimK2=length(K2);     % skill type age 16
dimK0=length(K0);     % skill type age 7
dimSc=length(Sc);     % schooling: No Qual, O Level, A Level, H.E.
dimXc=length(Xc);     % experience
dimYp=length(Yp);     % parental income
dimTc=length(Tc);     % time
dimUc=length(Uc);     % unobserved heterogeneity 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% PARAMETERS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if Dynamic == 1
    beta    = 0.95          ;   % discount factor
elseif Dynamic == 0
    beta    = 0;            ;   % discount factor
end

tut     =[0 0 0]'             ;   % Rs param: tuition fees O Levels

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% OLS to find good guess for initial values in wage equation
t1=size(indW,1);
X=[ones(t1,1) Ab2(indW) Ab0(indW) Q1(indW) Q2(indW) Q3(indW)...
    Exp(indW) Exp(indW).*Q1(indW) Exp(indW).*Q2(indW) Exp(indW).*Q3(indW)];

Y=log(Wag(indW));
[bW]=ols(Y,X);

% OLS to find good guess for initial values in Rs
Y=Wag(indW);
[bS]=ols(Y,X);

% Probit to find good guess for probability of success in school
indL1=find(Prd==1&Sta==1); % find individuals enrolled O Levels
indL1m=(id(indL1)~=id(indL1+1)); % check they are observed period after
indL1(indL1m==1)=[];
[bL1]=probit(Q1(indL1+1),[ones(size(indL1,1),1),Ab2(indL1),Ab0(indL1)]); % coeff in P(Q1=1|S1)

indL2=find(Prd==3&Sta==1); % find individuals enrolled A Levels
indL2m=(id(indL2)~=id(indL2+1)); % check they are observed period after
indL2(indL2m==1)=[];
[bL2]=probit(Q2(indL2+1),[ones(size(indL2,1),1),Ab2(indL2),Ab0(indL2)]); % coeff in P(Q1=1|S1)

% Probit to find good guess for probability of having a job offer
indD=find(Sta~=2&Obs<max(Obs)); % not working -> receive a job offer
indDm=((id(indD)~=id(indD+1))|(Sta(indD+1)==1)); % check they are observed period after
indD(indDm==1)=[];
[bD]=probit(Sta(indD+1)==2,[ones(size(indD,1),1),Ab2(indD+1),Ab0(indD+1),...
    Q1(indD+1),Q2(indD+1),Q3(indD+1),Exp(indD+1)...
    (Prd(indD+1)==2),(Prd(indD+1)==2).*Q1(indD+1) ]); %

% Probit to find good guess for probability of being fired
indP=find(Sta==2&Obs<max(Obs)); % working -> might be fired
indPm=(id(indP)~=id(indP+1)); % check they are observed period after
indP(indPm==1)=[];
[bP]=probit(Sta(indP+1)==3,[ones(size(indP,1),1),Ab2(indP+1),Ab0(indP+1),...
    Q1(indP+1),Q2(indP+1),Q3(indP+1),Exp(indP+1)]); %

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% UNOBSERVED HETEROGENEITY
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Cluster analysis to find good guess for initial values in wage equation
[IDX,C]=kmeans(log(Wag(indW)),dimUc);
pk=repmat(1/dimUc,[dimUc 1]); % Get a guess of proportions
a=[IDX pk]; b=sortrows(a); IDX=b(:,1); pk=b(:,2); % Sort in ascending order
IDX1=[IDX(1);IDX(2)-IDX(1);IDX(3)-IDX(2)]; % Define as deviation from previous type

% bD.beta=[bD.beta(1)+0.5;-0.5;bD.beta(2:end)];
% bP.beta=[bP.beta(1)-0.5;+0.5;bP.beta(2:end)];
% bL1.beta=[bL1.beta(1)+0.5;-0.5;bL1.beta(2:end)];

idk=[1; cumsum(id(1:end-1)~=id(2:end))+1]; % id ordered 1,2,...,N
idh =(idk-1)*max(Prd)+Prd; % het location to be used in computing likelihood

niter=10; % number of max iteration in ESM algorithm

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% INITIALIZING THETA
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

sig_0=[bW.sige^0.5]';               % S.D.: Rs shock, measurement error
sig_E=[repmat(1e7/scale,[3,1])];    % S.D.: Rs taste shock
al_0=[IDX1;bW.beta(2:end)];         % Rw coeff: wage equation  
gm_0=[repmat(bS.beta(1:1),[3,1])];  % Rs coeff: constant, skills, skill age 7 (O Lev, A Lev, HE)
la_0=[bL1.beta; bL2.beta];          % Lambda coeff's
dt_0=[bD.beta];                     % Delta coeff's
ph_0=[bP.beta];                     % Phi coeff's

sigU_0=[sig_0*1e2];     sigL_0=[sig_0*1e-10];
sigU_E=[sig_E+1e6];     sigL_E=[sig_E-1e6];
alU_0=[al_0+3e1];       alL_0=[al_0-3e1];
laU_0=[la_0+3e1];       laL_0=[la_0-3e1];
dtU_0=[dt_0+3e1];       dtL_0=[dt_0-3e1];
phU_0=[ph_0+3e1];       phL_0=[ph_0-3e1];
gmU_0=[gm_0+1e3];       gmL_0=[gm_0-1e3];

ThetaWAG=[sig_0; al_0]; blWAG=[sigL_0; alL_0];  buWAG=[sigU_0; alU_0];  tWAG=length(ThetaWAG);
ThetaLAM=[la_0];        blLAM=[laL_0];          buLAM=[laU_0];          tLAM=length(ThetaLAM);
ThetaDTA=[dt_0];        blDTA=[dtL_0];          buDTA=[dtU_0];          tDTA=length(ThetaDTA);
ThetaPHI=[ph_0];        blPHI=[phL_0];          buPHI=[phU_0];          tPHI=length(ThetaPHI);
ThetaSCH=[sig_E; gm_0]; blSCH=[sigL_E; gmL_0];  buSCH=[sigU_E; gmU_0];  tSCH=length(ThetaSCH);

bl=[blWAG;blLAM;blDTA;blPHI;blSCH];
bu=[buWAG;buLAM;buDTA;buPHI;buSCH];

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

THETAF=[ThetaWAG; ThetaLAM; ThetaDTA; ThetaPHI];

if Estimate==1 & Saved==0 & Guess==0
    % do nothing
elseif Estimate==1 & Saved==0 & Guess==1 % improve on guess
    [ThetaSCH]=f_guessSCH(ThetaSCH,pk);
    THETA=[THETAF;ThetaSCH]; 
elseif Estimate==1 & Saved==1 % use previous version estimates
    load FresultsXVIsq.mat
    THETA=THETA2;  clear THETA2;  
    ThetaWAG=THETA(1                           :tWAG);
    ThetaLAM=THETA(tWAG+1                      :tWAG+tLAM);
    ThetaDTA=THETA(tWAG+tLAM+1                 :tWAG+tLAM+tDTA);
    ThetaPHI=THETA(tWAG+tLAM+tDTA+1            :tWAG+tLAM+tDTA+tPHI);
    ThetaSCH=THETA(tWAG+tLAM+tDTA+tPHI+1       :tWAG+tLAM+tDTA+tPHI+tSCH);
    THETAF=[ThetaWAG;ThetaLAM;ThetaDTA;ThetaPHI];
    if Guess==1 % improve on guess
        [ThetaSCH]=f_guessSCH(ThetaSCH);
        THETA=[THETAF;ThetaSCH]; % 
    end
end

%%% Final THETA
THETA=[THETA;pk];
THETAg=THETA; % save final guess vector

logL=f_logLk(THETA);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% SIMULATION
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

nn=3e3; % number of individuals to simulate

for i=1:max(Prd) % define Skill age 16 and Skill age 7 data distributions
     Ab2Mat{i}=MyTabulate(Ab2(find(Prd==1)));
     Ab0Mat{i}=MyTabulate(Ab0(find(Prd==1)));    
end

clear id Ab0 Ab2 Ypr Prd Sch Qua Wag Exp;

[Smat,Qmat,Tmat,Wmat,Kmat,TmatH]=f_simulation(THETA); 
Tmat1=Tmat; TmatH1=TmatH;
[repmat((1:21)',[1 1 np]) Tmat1 TmatH1]

disp(['10 seconds pause before routine starts']);
disp(['press CTRL+C to break program']);
pause(10)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% RE-ESTIMATE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

dTHETA=0.2;
clear global THETAF; 
tic;

 [THETA2,logL,logL1,logLE,THETAg,logLSCH,iter]=...
    TJMP_XVIII(THETA,bl,bu);


%%% Standard Errors
S=gradp(@f_se,THETA2);
H=(S'*S);
I=inv(H);
se=sqrt(diag(I));
zratio=THETA2./se;

aa=toc/60;     % minutes to complete routine

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% RESULTS
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

diary SresultsXVI.txt

coeff={
'sigU   =  ';
'al_01  =  ';'al_02  =  ';'al_03  =  ';
'al_K2  =  ';'al_K0  =  ';
'al_S1  =  ';'al_S2  =  ';'al_S3  =  ';
'al_X0  =  ';'al_X1  =  ';'al_X2  =  ';'al_X3  =  ';
'l1_01  =  ';%'l1_02  =  ';
'l1_K2  =  ';'l1_K0  =  ';
'l2_01  =  ';%'l2_02  =  ';
'l2_K2  =  ';'l2_K0  =  ';
'dt_01  =  ';%'dt_02  =  ';
'dt_K2  =  ';'dt_K0  =  ';
'dt_S1  =  ';'dt_S2  =  ';'dt_S3  =  ';
'dt_X1  =  ';  
'dt_01  =  ';'dt_11  =  ';
'ph_01  =  ';%'ph_02  =  ';
'ph_K2  =  ';'ph_K0  =  ';
'ph_S1  =  ';'ph_S2  =  ';'ph_S3  =  ';
'ph_X1  =  ';
'sigE1  =  ';'sigE2  =  ';'sigE3  =  ';
'g1_00  =  ';%'g1_K2  =  ';'g1_K0  =  ';
'g2_00  =  ';%'g2_K2  =  ';'g2_K0  =  ';
'g3_00  =  ';'g3_K2  =  ';'g3_K0  =  ';
'pk_01  =  ';'pk_02  =  ';'pk_03  =  ';
};

Res=num2cell([THETA THETAg THETA2 ((THETA-THETA2)) se zratio]);
index=num2cell((1:length(THETA2))');
results={index coeff Res};

disp('**********************************************************');
disp('**********************************************************');
datestr(now)
disp([cd]);
disp(['Sequential Likelihood        ' num2str(Sequential)]);
disp(['routine completed in         ' num2str(aa) ' minutes']);
disp(['tolerance:                   ' num2str(tollerance)]);
disp(['discount factor:             ' num2str(beta)]);
disp(['Log Likelihood guess:        ' num2str(logL)]);
disp(['Log Likelihood start:        ' num2str(logL1)]);
disp(['Log Likelihood value:        ' num2str(logLE)]);
disp(['Log Likelihood SCH:          ' num2str(logLSCH)]);
disp(['number of individuals:       ' num2str(ni)]);
disp(['number of observations:      ' num2str(no)]);
disp(['simulation sample size:      ' num2str(nn)]);
disp(['number of periods:           ' num2str(np)]);
disp(['number of wage points:       ' num2str(length(indW))]);
disp(['wage scale:                  ' num2str(scale)]);
disp(['K2 K0 tabulate:              ' num2str(kp)]);
disp(['grid points K2 Sc K0 Ec Xc : ' num2str([dimK2 dimSc dimK0 dimEc dimXc])]);
disp(['unbalance panel:             ' num2str(ni*np>no)]);
disp(['missing wages:               ' num2str(ni-length(indW))]);
disp(['number of ESM iterations:    ' num2str([niter iter])]);
disp(['eigenv. smaller than one:    ' num2str(find(eig(H)<1)')])

[results{1} results{2} results{3}]

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% RE-SIMULATE
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

if Simulation==1

    [Smat,Qmat,Tmat,Wmat,Kmat,TmatH]=f_simulation(THETA2); % simulate data from THETA2
    Tmat2=Tmat; TmatH2=TmatH;

    table=[Tmat1 Tmat2 Tmat1-Tmat2] % compare fit                                                                
    tableH=[TmatH1 TmatH2 TmatH1-TmatH2]
end

diary off;
      
