% load the large dataset
clear; 
clc

[newdataraw_l,tt_l]=xlsread(['largedataset_1.xls'],'Monthly');
[T1,NVAR1]   =size(newdataraw_l);

% load the info
[info]=xlsread(['largedataset_1.xls'],'description','E3:G80');

GetCodesAndDescription; %% this runs a file that creats mnemo and description variables
pos_20  = info(:,1);
tolog   = info(:,2);
priors  = info(:,3);
% take logs where appropriate
datamat = newdataraw_l;
for kk=1:NVAR1; if tolog(kk)==1; datamat(:,kk)=log(newdataraw_l(:,kk));end;end

% get the positions of the N_20 VAR among the 78
temp             = find(isnan(pos_20)==0);
pos_others       = (setdiff(1:size(newdataraw_l,2),temp))';
data_MED_temp    = datamat(:,temp);
prior_med_temp   = priors(temp);
codes_MED_temp   = mnemo(temp);
description_MED_temp   = description(temp);
codes_other        = mnemo(pos_others);
descr_other        = description(pos_others);

% reshuffle to get CPI, FFRATES, EMP as the first 3
pos_temp         = pos_20(temp,1); %this goes from 1 to 20
% re-order variables

for ll=1:length(temp); pos_MED(ll)=find(pos_temp==ll);end

prior_MED       = prior_med_temp(pos_MED,1);
data_MED        = data_MED_temp(:,pos_MED);
codes_MED       = codes_MED_temp(pos_MED);
descr_MED       = description_MED_temp(pos_MED);
close all;

codes_ALL       = [codes_MED codes_other];
descr_ALL       = [descr_MED descr_other];
% plot(data_MED(:,1:3));

% get the positions of the large VAR by difference between the 20 and the
% 80
pos_large         = setdiff(1:NVAR1,temp);
data_LAR          = [data_MED datamat(:,pos_large)];
prior_LAR         = [prior_MED;priors(pos_large)];

% this is only to compute the dates for the forecast exercise
data      = tsmat(1959,1,12,newdataraw_l);
data      = data(1959,1,2013,7,:);
[T,N]   =size(data.matdata);

%			SETUP OF THE FORECAST EXCERCISE  */
hmin      = 1;
hmax      = 24;
lags      = 13;
presample = 120;  %* use a rolling window of 10 years */

% set a vector of dates to identify the observations corresponding to Tstart and Tend*/
esti_start = presample+lags;
esti_end   = T-hmax;

dates	   = datestr(data.dates,'mmmmyy');
date_start = dates(esti_start,:);
date_end   = dates(esti_end,:);
nfore      = esti_end-esti_start+1;

disp('Tot forecasts is');
disp(nfore);
disp(' ');
disp(['First estimation period ends in ', (date_start)]);
disp(' ');
disp(['Last estimation period ends in ', (date_end)]);
disp(' ');

save data_20_78