%% Replication code for generating correlations and RMSE's of different WEI revisions
%  Constructs output necessary for Table 3 ("Relationship between WEI
%  updates")

% LICENSE FOR CODE:
% Copyright Federal Reserve Bank of New York and Federal Reserve Bank of Dallas.
% You may reproduce, use, modify, make derivative works of, and distribute this code in whole or in part 
% so long as you keep this notice in the documentation associated with any distributed works. 
% Neither the names of the Federal Reserve Bank of New York and Federal Reserve Bank of Dallas nor the names 
% of any of the authors may be used to endorse or promote works derived from this 
% code without prior written permission. Portions of the code attributed to third 
% parties are subject to applicable third party licenses and rights. By your 
% use of this code you accept this license and any applicable third party license.
% THIS CODE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT ANY WARRANTIES OR CONDITIONS 
% OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY WARRANTIES
% OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 
% PARTICULAR PURPOSE, EXCEPT TO THE EXTENT THAT THESE DISCLAIMERS ARE HELD TO 
% BE LEGALLY INVALID. THE FEDERAL RESERVE BANK OF NEW YORK AND THE FEDERAL 
% RESERVE BANK OF DALLAS ARE NOT, UNDER ANY CIRCUMSTANCES, LIABLE TO YOU FOR
% DAMAGES OF ANY KIND ARISING OUT OF OR IN CONNECTION WITH USE OF OR INABILITY
% TO USE THE CODE, INCLUDING, BUT NOT LIMITED TO DIRECT, INDIRECT, INCIDENTAL,
% CONSEQUENTIAL, PUNITIVE, SPECIAL OR EXEMPLARY DAMAGES, WHETHER BASED ON BREACH
% OF CONTRACT, BREACH OF WARRANTY, TORT OR OTHER LEGAL OR EQUITABLE THEORY, EVEN
% IF THE FEDERAL RESERVE BANK OF NEW YORK OR THE FEDERAL RESERVE BANK OF DALLAS
% HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR LOSS AND REGARDLESS
% OF WHETHER SUCH DAMAGES OR LOSS IS FORESEEABLE.
%% Preliminaries

clear; close all; clc;
addpath(genpath(fullfile('..','Functions')));


%% Loading in and transforming input series

% Reading in data from Data Sheet in Data_WEI_Alt_Spec.csv
rawData      = readtable(fullfile('..','Data','Data_WEI_Alt_Spec.csv'));
rawData.Date = cellfun(@datenum, rawData.Date); % Converting dates to MATLAB datenums

% Reading in series information (series name, id, transformation)
seriesInfo = readtable(fullfile('..','Data','series_info.csv'));

% Subsetting seriesInfo to only include variables included in the model 
modelIndex  = find(seriesInfo.Model == 1);
seriesInfo  = seriesInfo(modelIndex, :);
rawData     = rawData(:, [1; modelIndex + 1]);

% Initializing table with transformed data w/ raw data
transfData = rawData;

% Looping through each input series in series info and making
% transformation
for iSeries = 1:size(seriesInfo, 1)
   
    % Pulling correct values from rawData
    seriesID     = seriesInfo.SeriesID{iSeries};
    seriesValues = rawData.(seriesID);
    
    % Making transformation
    transf                = seriesInfo.Transformation{iSeries};
    transfSeries          = transformData(seriesValues, transf);
    transfData.(seriesID) = transfSeries;
end

%% Constructing an unscaled version of WEI

% WEI start and end dates
weiStart = datenum(2007, 12, 30);
weiEnd   = datenum(2020, 2, 29);

% WEI Estimation start and end dates
estStart = datenum(2008, 1, 1);
estEnd   = datenum(2020, 2 , 29);

% Estimating weights for WEI (Note: weights not identified to sign)
estKeepIndex = transfData.Date >= estStart & transfData.Date <= estEnd;
XEst         = transfData{estKeepIndex, 2:end}; % Subsetting data
meanEst      = nanmean(XEst, 1);
stdEst       = nanstd(XEst, 0, 1);
XNormEst     = (XEst - repmat(meanEst, size(XEst, 1), 1)) ./ repmat(stdEst, size(XEst, 1), 1); % Normalizing estimation data
loadings     = pca(XNormEst); % Using PCA to find loadings of series (factors -> series)
weights      = loadings ^ -1; % Taking inverse to find weights of series (series -> factors)
weights      = weights(1, :)'; % Only interested in weights of first principal component

% Generating WEI (unscaled) for full sample period
fullKeepIndex = transfData.Date >= weiStart & transfData.Date <= weiEnd;
XFull         = transfData{fullKeepIndex, 2:end}; % Subsetting data
datesFull     = transfData.Date(fullKeepIndex);   % Subsetting dates for subsequent plotting
XNormFull     = (XFull - repmat(meanEst, size(XFull, 1), 1)) ./ repmat(stdEst, size(XFull, 1), 1); % Normalizing full sample period data based on estimation means and stds
weiFactor     = XNormFull * weights;

%% Estimating Regressions for Initial, First, and Second Revisions

%  First subsetting input data
startSample = datenum(2007, 12, 30);
endSample   = datenum(2020, 2, 23);
keepIndex   = transfData.Date >= startSample & transfData.Date <= endSample;
regData     = transfData(keepIndex, :);

%  Adding WEI and 2 lags of WEI to 
regData.("WEIL1") = lagmatrix(weiFactor, 1);
regData.("WEIL2") = lagmatrix(weiFactor, 2);
regData.("WEI")   = weiFactor;

%  Excluding NaNs
sumCol  = sum(regData{:, 2:end}, 2);
sIndex  = find(~isnan(sumCol), 1, 'first');
eIndex  = find(~isnan(sumCol), 1, 'last');
regData = regData(sIndex:eIndex, :); 

% Regression Specifications
specStrs      = cell(3, 1);
specStrs{1,1} = {'JRRSX', 'PSTL', 'RAS', 'WEIL1', 'WEIL2'}; % Initial
specStrs{2,1} = {'JRRSX', 'PSTL', 'RAS', 'LICN', 'RSTOTL', 'PELOUS', 'FUEL', 'WITH', 'WEIL1', 'WEIL2'}; % First
specStrs{3,1} = {'JRRSX', 'PSTL', 'RAS', 'LICN', 'RSTOTL', 'PELOUS', 'FUEL', 'WITH', 'ASASI', 'WEIL1', 'WEIL2'}; % Second

coeffCell = cell(size(specStrs));
for iRev  = 1:length(specStrs) % Looping through and storing regerssion coefficients from each estimation
    
    % Setting up inputs for regression
    spec = specStrs{iRev,1};
    X    = regData(:, spec);
    X    = [X{:,:}, ones(size(X, 1), 1)];
    Y    = regData.WEI;
    
    % Running Regression
    coeff = regress(Y, X);
    coeffCell{iRev, 1} = coeff;

end

% Generating Fitted Values
colNames = {'INIT','FIRST','SECOND'};
for iRev = 3:-1:1
   
    % Setting up inputs for fitting values
    spec = specStrs{iRev,1};
    
    % If initial estimate replace first lag of final WEI with estimate for
    % second revision of WEI (mimicks real time update)
    if iRev == 1
       
        spec{end-1} = 'SECOND';
        X    = regData(:, spec);
        X    = [X{:,:}, ones(size(X, 1), 1)];
        
        X(:, end - 2) = lagmatrix(X(:, end - 2), 1); % Taking the lag of second revision

    else
        X    = regData(:, spec);
        X    = [X{:,:}, ones(size(X, 1), 1)];
    end
    coeff = coeffCell{iRev, 1};
    regData.(colNames{iRev}) = X * coeff;
    
end

%% Rescaling WEI/Revisions

% Start and ending quarters to rescale WEI to GDP
rescaleStart = datenum(2008, 1, 1);
rescaleEnd   = datenum(2019, 10, 1);

% Reading in GDP data
gdpData      = readtable(fullfile('..', 'Data', 'gdp_vintage_data_full.csv'));
gdpData.DATE = datenum(gdpData.DATE); % Converting dates to MATLAB datenums

% Transforming GDP data to 4 quarter log difference 
gdpData.GDP_d4l = (log(gdpData.GDPC1_20201222) - log(lagmatrix(gdpData.GDPC1_20201222, 4))) * 100;
gdpKeepIndex    = gdpData.DATE >= rescaleStart & gdpData.DATE <= rescaleEnd;
gdpRescale      = gdpData.GDP_d4l(gdpKeepIndex); % Subsetting data based on data needed for rescaling

gdpDatesEOQ     = eomdate(year(gdpData.DATE), month(gdpData.DATE) + 2); % Dating GDP at end of the quarter for plotting

% Converting unscaled WEI to a quarterly average
weiTT = array2timetable(weiFactor, 'RowTimes', datetime(datesFull, 'ConvertFrom', 'datenum'));
weiTT = retime(weiTT, 'daily','previous'); 
weiTT = retime(weiTT, 'quarterly', 'mean');

% Subsetting quarterly WEI to period used for rescaling
weiRescale = weiTT.weiFactor(weiTT.Time >= datetime(rescaleStart, 'ConvertFrom', 'datenum') & ...
                             weiTT.Time <= datetime(rescaleEnd, 'ConvertFrom', 'datenum'));
                         
% Generating rescaling coefficients (regressing quarterly WEI on GDP)
rescaleCoeff = regress(gdpRescale, [ones(size(weiRescale)), weiRescale]);

% Rescaling WEI and revisions
colNames = {"WEI", "SECOND", "FIRST", "INIT"};
for iVersion = 1:length(colNames)
    
    weiVal = regData.(colNames{iVersion});
    regData.(colNames{iVersion}) = [ones(size(weiVal)),weiVal] * rescaleCoeff;

end

%% Computing RMSE's and correlations for 1/5/2008 - 2/29/2020

% Creating a matrix of revision history
revMat = regData(:, {'INIT', 'FIRST', 'SECOND', 'WEI'});
revMat = revMat{:, :};

% Computing RMSEs
rmseMat = NaN(3, 3);
for iEst = 1:3
   
    est      = revMat(:, iEst);
    estMat   = repmat(est, 1, size(revMat, 2));
    rmseVals = (revMat - estMat) .^ 2;
    rmseVals = nanmean(rmseVals, 1) .^ (1/2);
    
    rmseMat(iEst, iEst:end) = rmseVals(iEst + 1:end);
    
end

% Creating a table to output
rmseHistTable              = array2table(rmseMat, 'VariableNames', {'FIRST', 'SECOND', 'FINAL'});
rmseHistTable.('Estimate') = {'INIT', 'FIRST', 'SECOND'}';
rmseHistTable              = rmseHistTable(:, [end, 1:end-1]);

% Computing Correlations
keepIndex   = sum(isnan(revMat), 2) == 0; % Droppping NaN values
corrHistMat = corr(revMat(keepIndex, :));
corrHistMat = corrHistMat(1:end-1, :);
corrHistMat = corrHistMat(:, 2:end);
corrHistMat(2:end, 1) = NaN;
corrHistMat(end, 2)   = NaN;

corrHistTable              = array2table(corrHistMat, 'VariableNames', {'FIRST', 'SECOND', 'FINAL'});
corrHistTable.('Estimate') = {'INIT', 'FIRST', 'SECOND'}';
corrHistTable              = corrHistTable(:, [end, 1:end-1]);


%% Computing RMSE's and correlations for 3/28/2020 to 1/2/2021 (Published Values)

%  Loading in published WEI history
weiPublish    = readtable(fullfile('..','Data', 'wei_published_revision.csv'));
weiPublishMat = weiPublish{:, 2:end};

% Computing RMSEs
rmseMat = NaN(3, 3);
for iEst = 1:3
   
    est      = weiPublishMat(:, iEst);
    estMat   = repmat(est, 1, size(revMat, 2));
    rmseVals = (weiPublishMat - estMat) .^ 2;
    rmseVals = nanmean(rmseVals, 1) .^ (1/2);
    
    rmseMat(iEst, iEst:end) = rmseVals(iEst + 1:end);
    
end

% Creating a table to output
rmsePublishTable              = array2table(rmseMat, 'VariableNames', {'FIRST', 'SECOND', 'FINAL'});
rmsePublishTable.('Estimate') = {'INIT', 'FIRST', 'SECOND'}';
rmsePublishTable              = rmsePublishTable(:, [end, 1:end-1]);

% Computing Correlations
keepIndex   = sum(isnan(weiPublishMat), 2) == 0; % Droppping NaN values
corrPublishMat = corr(weiPublishMat(keepIndex, :));
corrPublishMat = corrPublishMat(1:end-1, :);
corrPublishMat = corrPublishMat(:, 2:end);
corrPublishMat(2:end, 1) = NaN;
corrPublishMat(end, 2)   = NaN;

corrPublishTable              = array2table(corrPublishMat, 'VariableNames', {'FIRST', 'SECOND', 'FINAL'});
corrPublishTable.('Estimate') = {'INIT', 'FIRST', 'SECOND'}';
corrPublishTable              = corrPublishTable(:, [end, 1:end-1]);

%% Writing Outputs to CSV

writetable(rmseHistTable,    fullfile('..','Data', 'Outdata', 'wei_revision_rmse_hist.csv'));
writetable(corrHistTable,    fullfile('..','Data', 'Outdata', 'wei_revision_corr_hist.csv'));
writetable(rmsePublishTable, fullfile('..','Data', 'Outdata', 'wei_revision_rmse_publish.csv'));
writetable(corrPublishTable, fullfile('..','Data', 'Outdata', 'wei_revision_corr_publish.csv'));