%This is the main file for the implementation of the EM Algorithm
% in the estimation of VMA processes in Metaxoglou and Smith, Journal
% of Applied Econometrics, 2007
% Questions related to the code should be e-mailed to:
% konstantinos.metaxoglou@bateswhite.com
clear all
close all
clc
global omega missing mycols sample F x hours approphour...
    P10 P y approphour_non non_missing appropj mycols

% astype 1: rd, 2: ru, 3:sp, 4:ns
% period 1: pre-crisis, 2:crisis, 3:post-crisis
%**************************************************************************
% Define Code, Input and Output Paths
%**************************************************************************
code_path=   'C:\VMA\MS 5531 Matlab\Code\';
input_path=  'C:\VMA\MS 5531 Matlab\Input\';
output_path= 'c:\VMA\MS 5531 MAtlab\Output\';
input_file=  'AS_data.xls';
output_file= 'estimation results.xls';  %store parameter estimates + s.e.
output_file2='estimation log file.xls'; %store a log-file for the EM iters
%**************************************************************************
period=1;
astype=1;
robust=0;  %see line 64
maxiters=5;

for astype=1:1
    for period=1:1
    start_time=clock;
%**************************************************************************    
% read the data
%**************************************************************************            
    if and(astype==1,period==1), input_sheet='rd_precrisis'; end
    if and(astype==1,period==2), input_sheet='rd_crisis'; end
    if and(astype==1,period==3), input_sheet='rd_postcrisis'; end
    
    if and(astype==2,period==1), input_sheet='ru_precrisis'; end
    if and(astype==2,period==2), input_sheet='ru_crisis'; end
    if and(astype==2,period==3), input_sheet='ru_postcrisis'; end
   
    if and(astype==3,period==1), input_sheet='sp_precrisis'; end
    if and(astype==3,period==2), input_sheet='sp_crisis'; end
    if and(astype==3,period==3), input_sheet='sp_postcrisis'; end
   
    if and(astype==4,period==1), input_sheet='ns_precrisis'; end
    if and(astype==4,period==2), input_sheet='ns_crisis'; end
    if and(astype==4,period==3), input_sheet='ns_postcrisis'; end
   
    [data, var_names] = xlsread([input_path,input_file],input_sheet);
        
    date=data(:,1); 
    hour=data(:,2);
    hprice=data(:,3);
    price=data(:,4);
    missing=data(:,5);
    non_missing=missing==0;
    

    y=hprice-price;
    x=ones(size(y,1),1);
    sample=size(y,1);

%**************************************************************************    
% treat spreads below 1st and above 99th percentiles as missing
%**************************************************************************
    low=prctile(y,1);
    high=prctile(y,99);
    if robust==1
        robust2=zeros(size(y,1),1);
        tmp=(1:1:size(y,1))';
        tmp=tmp(y>=low&y<=high,1,1);
        robust2(tmp)=1;
        for i=1:size(non_missing,1)
            if robust2(i,1)==0
                non_missing(i,1)=0;
            end
        end
    end
%**************************************************************************
    mycrit=1e-4;
    omfactor=2; %determines the amount of "noise" introduced
    hours=24;;  %determines the dimension of the VMA process
    mycols=34;  
%**************************************************************************
% Provide some initial values for the parameters to be estimated
%**************************************************************************
    appropj=(mycols-hours+1:1:mycols)';
    approphour=hour;
    F=zeros(mycols,mycols);     
    F(2:mycols,1:mycols-1)=eye(mycols-1);
    A=zeros(hours,size(x,2));
    H=zeros(hours,mycols);
    Q=zeros(hours,hours);

    j=1; stddev=zeros(hours,1);
    for i=1:hours
        ind=approphour==i;
        y2=y(ind==1,:);            
        x2=x(ind==1,:);            
        j=appropj(i);            
        A(i,:)=(inv(x2'*x2)*(x2'*y2))';                            
        H(i,2:j)=i/100*ones(1,j-2+1);
        stddev(i)=std(y2-x2*A(i,:)');            
    end
    H(:,1)=ones(hours,1);
    Q=diag(stddev,0); 
    omega=Q./omfactor;
%**************************************************************************         
    P10=0;    
    P=zeros(sample,sum((1:1:mycols))');
    Q0=Q; A0=A; H0=H;
    estim=[diag(Q);vecr(A);vecr(H)];        

    ind=vecr(H(:,2:size(H,2)))~=0;
    vecr_H=vecr(H(:,2:size(H,2)));
    vecr_H=vecr_H(ind==1,:);

    convcrit=ones(size([diag(Q);vecr(A);vecr_H],1),2);        
    convcrit(:,1)=1000*convcrit(:,1);
    convcrit(:,2)=2000*convcrit(:,2);
    convind=norm(convcrit(:,2)-convcrit(:,1))./norm(convcrit(:,1));

    convind2=10^6;
    logl=-10^12;
%**************************************************************************    
%**************************************************************************                 
% Iterate until the convergence criteria have been met  
%**************************************************************************
%**************************************************************************
    iters=1;
    log_info=[];    
    while iters<=maxiters;
    % while convind2>0.01
        logl_old=logl;

        [logl,K2,ksaismo2,yvar2]=ss_smooth(estim); % E step 

        y_non=y(non_missing==1,:);
        x_non=x(non_missing==1,:);

        ksaismo2_non=ksaismo2(non_missing==1,:);
        psmo2_non=P(non_missing==1,:);
        approphour_non=approphour(non_missing==1,:);
        
        non_miss_hour=zeros(hours,1);
%**************************************************************************
% Beginning of M Step
%**************************************************************************
        for j=1:hours
            ind=approphour_non==j;
            jj=appropj(j);
       
            y1=y_non(ind==1,:);       
            x1=x_non(ind==1,:);
         
            ksaismo3=ksaismo2_non(ind==1,2:jj);
            ksaismo3b=ksaismo2_non(ind==1,1);
        
            non_miss_hour(j)=size(ksaismo3,1);
        
            sum1=0; sum2=0; sum3=0;
            sum4=0; sum5=0; sum6=0;        
        
            for t=1:size(y1,1)
                sum1=sum1+ksaismo3(t,:)'*ksaismo3(t,:);
                sum2=sum2+ksaismo3(t,:)'*x1(t,:);
                sum3=sum3+x1(t,:)'*(ksaismo3(t,:));
                sum4=sum4+x1(t,:)'*x1(t,:);
                sum5=sum5+ksaismo3(t,:)'*(y1(t)-ksaismo3b(t));
                sum6=sum6+x1(t,:)'*(y1(t)-ksaismo3b(t));                
            end        
            
           psmo3=psmo2_non(ind==1,:);
           sum_psmo=0;
            t=1;
            for t=1:size(psmo3)
                tmp_psmo=vec2symmat(psmo3(t,:)');
                sum_psmo=sum_psmo+tmp_psmo;       
            end

            sum5=sum5-sum_psmo(2:jj,1);
            sum_psmo=sum_psmo(2:jj,2:jj);
            sum1=sum1+sum_psmo;
        
            beta=inv([[sum1 sum2]; [sum3 sum4]])*[sum5; sum6];         
        
            H(j,1:jj)=[1 beta(1:size(ksaismo3,2))'];
            A(j,1:size(x1,2))=beta(size(ksaismo3,2)+1:size(beta,1))';                
        
            ind=approphour==j;
            psmo3=P(ind==1,:);
            ksaismo3b=ksaismo2(ind==1,1);
        
            ii=hours*ceil(mycols/hours);            
            ksaismo_init=flipud(ksaismo2(1,2:mycols)');
            ksaismo_init=ksaismo_init(ind(ii-mycols+2:ii)==1,:);
        
            psmo_init=reshape(vec2symmat(P(1,:)'),mycols,mycols);
            psmo_init=flipud(diag(psmo_init(2:mycols,2:mycols)));
            psmo_init=psmo_init(ind(ii-mycols+2:ii)==1);
        
            test1=isempty(psmo_init);
            test2=isempty(ksaismo_init);
            if (test1==1) & (test2==1);
                tmp_Q=(ksaismo3b'*ksaismo3b+sum(psmo3(:,1)))/size(psmo3,1);
            end
            if (test1~=1) & (test2~=1);
                tmp_Q=(ksaismo_init'*ksaismo_init+sum(psmo_init)+...
                    ksaismo3b'*ksaismo3b+sum(psmo3(:,1)) )/...
                    (size(psmo_init,1)+size(psmo3,1));
            end
           Q(j,j)=tmp_Q;            
        end
%**************************************************************************
% End of M Step
%**************************************************************************
    estim=[diag(Q);vecr(A);vecr(H)];        
    ind=vecr(H(:,2:size(H,2)))~=0;
    vecr_H=vecr(H(:,2:size(H,2)));
    vecr_H=vecr_H(ind==1,:);

    convcrit(:,1)=convcrit(:,2);
    convcrit(:,2)=[diag(Q);vecr(A);vecr_H];
    convind=norm(convcrit(:,2)-convcrit(:,1))./norm(convcrit(:,1));
    convind2=abs(logl-logl_old);

    if iters>=2
        fprintf('  iter.: %5i', iters);
        fprintf('  obs.log lik: %12.4f\t', logl);
        fprintf('  obs.log gain: %12.4f\n', logl-logl_old);
    end

    log_info(iters,:)=[iters,logl,logl-logl_old, convind];
    
    iters=iters+1;
end   
%**************************************************************************
% Retriewve MA coefficients and std.errors
%**************************************************************************
    [ss_kalman ss_yvar]=ss_gain_yvar(estim);

    ma=ma_coeffs(H,F,ss_kalman,appropj,mycols);

    ma_serrs=bj_serrs(ma,ss_yvar,appropj,hours,non_miss_hour);

    QA_serrs=serrs(estim,ksaismo2);

    end_time=clock;
%**************************************************************************
% Prepare Output files
%**************************************************************************
    estimates=[diag(Q),A,H];
    estimates_xls=cell(size(estimates));
    estimates_xls=num2cell(estimates);

    std_errors=[QA_serrs(:,1),QA_serrs(:,2),ma_serrs];
    std_errors_xls=cell(size(std_errors));
    std_errors_xls=num2cell(std_errors);

    output_xls=cell(2*size(estimates,1)+2,size(estimates,2));
    output_xls(1,1)={'Estimates'};
    output_xls(2:25,1:size(estimates,2))=estimates_xls;
    output_xls(26,1)={'Std.Errors'};
    output_xls(27:50,1:size(std_errors,2))=std_errors_xls;

    log_xls=cell(size(log_info,1)+3,size(log_info,2));
    log_xls(1,:)={'iteration','obs. log-likelihood',...
              'obs. log-likelihood gain',...
              'X-convergence'};
    log_xls(2:2+size(log_info,1)-1,1:4)=num2cell(log_info);
    log_xls(size(log_info,1)+2,1)={'start time'};
    log_xls(size(log_info,1)+2,2:4)=num2cell(start_time(:,4:6));
    log_xls(size(log_info,1)+3,1)={'end time'};
    log_xls(size(log_info,1)+3,2:4)=num2cell(end_time(:,4:6));

    if robust==0
        if period==1
            if astype==1, sheetname='rd_pre_crisis'; end
            if astype==2, sheetname='ru_pre_crisis'; end
            if astype==3, sheetname='sp_pre_crisis'; end
            if astype==4, sheetname='ns_pre_crisis'; end
        end
        if period==2
            if astype==1, sheetname='rd_crisis'; end
            if astype==2, sheetname='ru_crisis'; end
            if astype==3, sheetname='sp_crisis'; end
            if astype==4, sheetname='ns_crisis'; end
        end
        if period==3
            if astype==1, sheetname='rd_post_crisis'; end
            if astype==2, sheetname='ru_post_crisis'; end
            if astype==3, sheetname='sp_post_crisis'; end
            if astype==4, sheetname='ns_post_crisis'; end
        end
    end

    if robust==1
        if period==1
            if astype==1, sheetname='rd_pre_crisis_robust'; end
            if astype==2, sheetname='ru_pre_crisis_robust'; end
            if astype==3, sheetname='sp_pre_crisis_robust'; end
            if astype==4, sheetname='ns_pre_crisis_robust'; end
        end
        if period==2
            if astype==1, sheetname='rd_crisis_robust'; end
            if astype==2, sheetname='ru_crisis_robust'; end
            if astype==3, sheetname='sp_crisis_robust'; end
            if astype==4, sheetname='ns_crisis_robust'; end
        end
        if period==3
            if astype==1, sheetname='rd_post_crisis_robust'; end
            if astype==2, sheetname='ru_post_crisis_robust'; end
            if astype==3, sheetname='sp_post_crisis_robust'; end
            if astype==4, sheetname='ns_post_crisis_robust'; end
        end
    end
    cd(output_path);
    xlswrite(output_file,output_xls,sheetname);
    xlswrite(output_file2,log_xls,sheetname);
    cd(code_path);
%**************************************************************************                      
    end %period
end %astype

