% Forecasting I(1) variables
% Last checking: 11/16/2019

clear all
load DATA.txt % load I(1) predictors (T by N1 matrix)
load DDATA.txt % load I(0) predictors (T-1 by N matrix) (N=N1+N0)
load dateyearmonth.txt % load date year/month
load keyvar_inf.txt % load key variables (for forecasting inflation rates)
load keyvar.txt % load key variables (for forecasting other 68 variables)
rmax = 10;% prespecified the number of maximum factors (set to be 10)
r_0 = IPC_1(DATA,rmax);% estimated # of factors by Bai's (2004) ICP_1 criterion (by the full sample)
kmax = 20;% prespecified the maximum lag length
[Ttotal, N1] = size(DATA); [Ttotal, N] = size(DDATA); N0 = N - N1;% Ttotal=total time series observations, N1=# of I(1) predictors, N0=# of I(0) predictors, and N=N1+N0
h = 1;% forecasting horizon

% Variable specification (inflation forecasting)
Y=keyvar_inf(:,1);% monthly inflation rates
y=keyvar_inf(:,4);% differenced monthly inflation rates
Z=[ones(length(keyvar_inf),1), keyvar_inf(:,2:3)];% constant, unemployment rate and term spread
z=[ones(length(keyvar_inf),1), keyvar_inf(:,5:6)];% constant, unemployment rate and term spread (treated to be stationary)

% Variable specification
%var_num = 1;% Variable numbers
%Y = keyvar(:,var_num);% Target variable (level)
%y = keyvar(:,var_num+68);% Target variable (differenced)
%Z = [ones(length(keyvar),1) [1:length(keyvar)]']; % constant and time trend
%Z=[ones(length(keyvar),1)]; % constant
%z = ones(length(keyvar),1); % constant

% Three forecasting periods: Pre Great Moderation/Great Moderation/Crisis and aftermath
T1C = 288; T2C = 570; T3C = Ttotal; Tini_sample1 = 13*12; Tini_sample2 = 6*12;

% Period 1: Pre Great Moderation
 %  Define the initial samples
XDATA_1 = DATA(1:T1C,:); ZDATA_1 = Z(1:T1C,:); YDATA_1 = Y(1:T1C+h); DXDATA_1 = DDATA(2:T1C,:); zDATA_1 = z(2:T1C,:); yDATA_1 = y(2:T1C+h);% Pre Great Moderation period (Period 1: 1960 - 1983), Tsample=288
XDATA_1sample = XDATA_1(1:Tini_sample1,:); DXDATA_1sample = DXDATA_1(1:Tini_sample1,:);% initial sampling period
pred_L_1 = zeros(T1C-Tini_sample1-h+1,1); r_f_optimal_L_1 = zeros(T1C-Tini_sample1-h+1,1); k_f_optimal_L_1 = zeros(T1C-Tini_sample1-h+1,1);% bins
pred_D_1 = zeros(T1C-Tini_sample1-h+1,1); r_f_optimal_D_1 = zeros(T1C-Tini_sample1-h+1,1); k_f_optimal_D_1 = zeros(T1C-Tini_sample1-h+1,1);% bins
for tt = 1:T1C-Tini_sample1-h+1
[pred_L_1(tt), r_f_optimal_L_1(tt), k_f_optimal_L_1(tt)] = cross_validation(DATA(1:Tini_sample1-1+tt,:),Z(1:Tini_sample1-1+tt,:),Y(1:Tini_sample1-1+tt+h),y(2:Tini_sample1-1+tt+h),h,r_0,kmax,0);
[pred_D_1(tt), r_f_optimal_D_1(tt), k_f_optimal_D_1(tt)] = cross_validation(DDATA(2:Tini_sample1-1+tt,:),z(2:Tini_sample1-1+tt,:),Y(1:Tini_sample1-1+tt+h),y(2:Tini_sample1-1+tt+h),h,r_0,kmax,1);
tt
end
ferr_L_1 = pred_L_1 - Y(Tini_sample1+h:T1C); ferr_D_1 = pred_D_1 - Y(Tini_sample1+h:T1C);% forecasting errors for the first period
RMSE_L_1 = sqrt(mean(ferr_L_1.^2)); RMSE_D_1=sqrt(mean(ferr_D_1.^2));% root mean squared forecasting errors for the first period
LD_1 = RMSE_L_1/RMSE_D_1;% relative efficiency for the first period

% Period2: Great Moderation
 % Define the initial samples
XDATA_2 = DATA(T1C+1:T2C,:); DXDATA_2=DDATA(T1C+2:T2C,:);% Great Moderation period (Period 2: 1984 - 2007:06), Tsample=282
pred_L_2 = zeros((T2C-T1C)-Tini_sample1-h+1,1); r_f_optimal_L_2=zeros((T2C-T1C)-Tini_sample1-h+1,1); k_f_optimal_L_2=zeros((T2C-T1C)-Tini_sample1-h+1,1);% bins
pred_D_2 = zeros((T2C-T1C)-Tini_sample1-h+1,1); r_f_optimal_D_2=zeros((T2C-T1C)-Tini_sample1-h+1,1); k_f_optimal_D_2=zeros((T2C-T1C)-Tini_sample1-h+1,1);% bins
for tt = 1:(T2C-T1C)-Tini_sample1-h+1
[pred_L_2(tt), r_f_optimal_L_2(tt), k_f_optimal_L_2(tt)] = cross_validation(DATA(T1C+1:T1C+Tini_sample1-1+tt,:),Z(T1C+1:T1C+Tini_sample1-1+tt,:),Y(T1C+1:T1C+Tini_sample1-1+tt+h),y(T1C+2:T1C+Tini_sample1-1+tt+h),h,r_0,kmax,0);
[pred_D_2(tt), r_f_optimal_D_2(tt), k_f_optimal_D_2(tt)] = cross_validation(DDATA(T1C+2:T1C+Tini_sample1-1+tt,:),z(T1C+2:T1C+Tini_sample1-1+tt,:),Y(T1C+1:T1C+Tini_sample1-1+tt+h),y(T1C+2:T1C+Tini_sample1-1+tt+h),h,r_0,kmax,1);
tt
end
ferr_L_2 = pred_L_2-Y(T1C+Tini_sample1+h:T2C);ferr_D_2=pred_D_2-Y(T1C+Tini_sample1+h:T2C);% forecasting errors for the second period
RMSE_L_2 = sqrt(mean(ferr_L_2.^2));RMSE_D_2=sqrt(mean(ferr_D_2.^2));% root mean squared forecasting errors for the second period
LD_2 = RMSE_L_2/RMSE_D_2;% relative efficiency for the second period

% Period3: Crisis and aftermath
 % Define the initial samples
XDATA_3 = DATA(T2C+1:T3C,:); DXDATA_3=DDATA(T2C+1:T3C,:);% Post Great Moderation period (Period 3: 2007:06 -  2018:10), Tsample=136
pred_L_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1); r_f_optimal_L_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1); k_f_optimal_L_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1);% bins
pred_D_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1); r_f_optimal_D_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1); k_f_optimal_D_3 = zeros((T3C-T2C)-Tini_sample2-h+1,1);% bins
if h==24
   kmax = 11; 
end
for tt = 1:(T3C-T2C)-Tini_sample2-h+1
[pred_L_3(tt), r_f_optimal_L_3(tt), k_f_optimal_L_3(tt)] = cross_validation(DATA(T2C+1:T2C+Tini_sample2-1+tt,:),Z(T2C+1:T2C+Tini_sample2-1+tt,:),Y(T2C+1:T2C+Tini_sample2-1+tt+h),y(T2C+2:T2C+Tini_sample2-1+tt+h),h,r_0,kmax,0);
[pred_D_3(tt), r_f_optimal_D_3(tt), k_f_optimal_D_3(tt)] = cross_validation(DDATA(T2C+2:T2C+Tini_sample2-1+tt,:),z(T2C+2:T2C+Tini_sample2-1+tt,:),Y(T2C+1:T2C+Tini_sample2-1+tt+h),y(T2C+2:T2C+Tini_sample2-1+tt+h),h,r_0,kmax,1);
tt
end
ferr_L_3 = pred_L_3-Y(T2C+Tini_sample2+h:T3C); ferr_D_3=pred_D_3-Y(T2C+Tini_sample2+h:T3C);% forecasting errors for the third period
RMSE_L_3 = sqrt(mean(ferr_L_3.^2)); RMSE_D_3=sqrt(mean(ferr_D_3.^2));% root mean squared forecasting errors for the third period
LD_3 = RMSE_L_3/RMSE_D_3;% relative efficiency for the third period

[dm1, pv1] = dbtest(ferr_L_1,ferr_D_1,1);% DM test results for the first period
[dm2, pv2] = dbtest(ferr_L_2,ferr_D_2,1);% DM test results for the second period
[dm3, pv3] = dbtest(ferr_L_3,ferr_D_3,1);% DM test results for the third period

save('Variable_52_h24.mat')