clear;
clc;
close all;
warning('off');
addpath(genpath(pwd));

%% NAME OF DIRECTORY OF OUTPUT
DirOut = [pwd,filesep,'output_forecast',filesep];
if exist(DirOut,'dir')==0
    mkdir(DirOut)
end

%% set the parameters of the exercise
% H_grid        = 0.5:.1:1;
H_grid        = 0.7;
kernshape     = 'gauss';
%phi_grid      = [10^(-10) 10^(-5) 10^(-4) 10^(-3) 10^(-2):.03:1].^.5;

phi_grid      = [10^(-5)].^.5;
optimize_step = 1; % determines how frequently you want to optimize tightness
H_gridsize    = length(H_grid);
phi_gridsize  = length(phi_grid);


%% prepare the data for the analysis
load('data_20_78')
gamma_mean   = prior_LAR;
dataselect   = data_LAR(1:T,:);
lags          = 4;

pass = 0;
for ModelSize= [5 10 20]
    
    pass=pass+1;
   
    disp('Estimating Model Size')
    disp(ModelSize)    
    gamma_mean   = prior_LAR(1:ModelSize,:);
    dataselect   = data_LAR(1:T,1:ModelSize);
    
    NCoefficients = ModelSize*ModelSize*lags+ModelSize;
    
    tt        = size(dataselect,1);
    m         = size(dataselect,2); % number of variables in the VAR
    % -- estimation window -- %
    YY_tt        = dataselect(1:tt,:);
    T_est        = size(YY_tt,1)-lags;
    constvec     = ones(T_est,1);
    
    %% ESTIMATE WITH KERNEL BASED METHOD
    tic
    % First get lags of the variables: these are the same for any model
    % so it is more efficient to compute them outside of the routines
    [y_l,x_l] = matrix_var(YY_tt,lags,T_est+lags,m);
    % Fit single equation models to each equation to get an estimate of the covariance matrix
    % This also remains the same across models
    sigu_ar = zeros(1,m);
    for ll=1:m
        y_l_ar = y_l(:,ll);
        pos_ar = seqa(ll,m,lags);
        x_l_ar = [x_l(:,pos_ar') constvec];
        Proj_x = x_l_ar*((x_l_ar'*x_l_ar)\x_l_ar')*y_l_ar;
        sigu_ar(1,ll)=(y_l_ar-Proj_x)'*(y_l_ar-Proj_x)/T_est;
    end
    
    poslags = [vec([1:m:lags*m; 2:m:lags*m; 3:m:lags*m])' m*lags+1];
    % compute matrices for VAR estimation that are constant for all models
    % 1. matrix Omega
    sigmas=sigu_ar.^.5;
    Omega=[kron(diag(1:lags),diag(sigmas)) zeros(m*lags,1);        zeros(1,m*lags+1)];
    Omega(end,end)=1/(1e+20);%//very lose prior on the constant like banbura et al.
    OmegaprimeOmega = Omega'*Omega;
    
    % matrices big_X and big_Y
    slopes           = cell(length(phi_grid),length(H_grid));
    slopes_nokernel  = cell(1,length(H_grid));
    d1               = [diag(gamma_mean);zeros(m*(lags-1)+1,m)];
    close all;
   % start Timing Kernel Based VAR
    tic
    for posgrid_phi=1:phi_gridsize
        for posgrid_H=1:H_gridsize
            % compute the kernel weights
            if strcmp('rect', kernshape)
                Wt   = w_const(T_est,H_grid(posgrid_H));
            elseif strcmp('gauss', kernshape)
                H = T_est.^H_grid(posgrid_H);
                Wt = w_gauss(T_est,T_est,H,1);
            elseif strcmp('ewma', kernshape)
                H = T_est.^H_grid(posgrid_H);
                Wt = w_ewma(T_est,H_grid(posgrid_H));
            end
            % approximate the number of observations to further speed up
            % computation: throw away weights lower than 1/10000
            T_approx = sum((diag(Wt))<(1/10^4));
            y_l_appr=y_l(T_approx+1:end,:);
            x_l_appr=x_l(T_approx+1:end,:);
            T_est_appr = T_est-T_approx;
            temp = diag(Wt);
            Wt_appr = diag(temp(T_approx+1:end));
            big_X_appr=x_l_appr'*Wt_appr*x_l_appr;
            big_Y_appr=x_l_appr'*Wt_appr*y_l_appr;
            phi = phi_grid(posgrid_phi);
            b  = (big_X_appr+OmegaprimeOmega/(phi^2))\(big_Y_appr+(OmegaprimeOmega/(phi^2))*d1);
            slopes{posgrid_phi,posgrid_H}=b;
        end
    end
    KernelModel.Time{pass}   = toc;
    KernelModel.Coeffs{pass} = NCoefficients;
    KernelModel.SizeY{pass}  = m;
    KernelModel.Nlags{pass}  = lags;
    KernelModel.Tobs{pass}   = size(YY_tt,1);
    
    %%
    % Some parameters for the exercise
    priormean       = [0*diag(gamma_mean(1:ModelSize));zeros(m*(lags-1)+1,m)];
    m               = ModelSize; % number of variables in the VAR
    YY_ttStaz       = normaliz(YY_tt(2:end,:)-YY_tt(1:end-1,:));
    
    % %% NOW ESTIMATE USING THE KALMAN FILTER WITH FORGETTING FACTORS
    % % FIRST DETERMINE THE PRIOR VARIANCE, THIS IS A FUNCTION OF phi
    %  priors for estimation of KK model
    kappa            = .96;
    K                = size(priormean(:),1);% dimension of the state vector
    % index in each equation which are the own lags (remember constant vector is at the end)
    ind = zeros(m,lags);
    for i=1:m
        ind(i,:) = i:m:K/m-1;
    end
    
    % start Timing KK method
    tic
%    ForgetFactorGrid = 0.94:.01:.99;
    ForgetFactorGrid = .99;
    
    if (length(ForgetFactorGrid)==H_gridsize)
        % ok
    else 
        error('sizes of the grids are not the same between Kernel and Kalman Filter estimator')
    end
    tic
    for posgrid_phi=1:phi_gridsize
        for posgrid_H=1:H_gridsize
            % This is used as forgetting factor
            % The main idea is to use the size of the grid unchanged in the two case,
            % the actual values for the forgetting factors would need to be changed in
            % forecast exercises
            lambda           = ForgetFactorGrid(posgrid_H);
            phi              = phi_grid(posgrid_phi).^2; %take square because in the kernel VAR you have standard deviations
            % form prior variance, which depends on phi
            V_i   = zeros(K/m,m);
            for i = 1:m  % for each i-th equation
                for j = 1:K/m   % for each j-th RHS variable
                    if j==K/m   % the last coefficient is that of the constant, use diffuse prior here
                        V_i(j,i) = 1000; % variance on intercept
                    elseif find(j==ind(i,:))>0 % this means you are at an "own lag"
                        p_j = ceil(j/m);   %the "ceil((j-1)/M)" command finds the associated lag number for each parameter
                        V_i(j,i) = phi./(p_j.^2); % variance on own lags
                    else
                        p_j = ceil(j/m);   %the "ceil((j-1)/M)" command finds the associated lag number for each parameter
                        ll1 = j-(p_j-1)*m;  % now determine which variable you are looking at to rescale the variance
                        V_i(j,i) = phi/(p_j.^2);
                    end
                end
            end
            % Now V (MINNESOTA VARIANCE) is a diagonal matrix with diagonal elements the V_i
            V_prior          = single(diag(V_i(:)));
            prior_theta_mean = 0*priormean(:); % I am using stationary data here
            prior_theta_V    = V_prior;
            [yy_ll,xx_ll]    = matrix_var(YY_ttStaz(1:T-1,:),lags,T-1,m);
            Sigma_0          = single(cov(yy_ll));
            [a_tt,P_tt] = var_KKInsample(prior_theta_mean,prior_theta_V,YY_ttStaz,m,Sigma_0,kappa,lambda,lags,1);
        end
    end
    
    KK.Time{pass}   = toc; % time is linear in the grids size, so estimate once and multiply
    KK.Coeffs{pass} = size(a_tt,1);
    KK.SizeY{pass}  = size(YY_ttStaz,2);
    KK.Nlags{pass}  = lags;
    KK.Tobs{pass}   = size(YY_ttStaz,1);
end
save CompGainresults 


KernelModel

KK