main_path <- getwd()
# The code in "data_preparation_wide.R" creates the data object "data", "data_wide_97Q4" and "data_wide_97Q4_PYNONG"
# it also writes a copy of the clean data in "data/data_clean_wide.csv"
source("data_preparation_wide.R")
source("functions.R")
data <- data_wide
rm(data_wide)
# load packages -----------------------------------------------------------

library(ARDL)
library(ggplot2)
library(lmtest) # for bgtest, resettest
library(parallel) # is uses forking and so it works only for Linux. Use another parallel method for windows or run everything on one core.
library(xtable)

# define the economic models
formula_n = w ~ Prod + UR + Wedge + Union -1 | D7475 + D7579
formula_c = w ~ Prod + UR + Wedge + Union | D7475 + D7579
formula_ct = w ~ Prod + UR + Wedge + Union + trend(w, scale = FALSE) | D7475 + D7579

# Sensitivity Analysis ----------------------------------------------------

# Stability of Coefficients -----------------------------------------------

# data_wide_97Q4: same as the data_wide dataset, but it just covers the narrow sense replication years
# data_wide_97Q4_PYNONG: same as the data_wide_97Q4 dataset, but uses PYNONG instead of PGVA as a deflator in variables W and Wedge

# Define ARDL order -------------------------------------------------------

# calculate unrestricted ECMs from the underlying ARDL(p) models with p = 1, 2, ..., 7.
uecm_n_p_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_n, order = c(p, p, p, p, p), data = data_wide_97Q4, start = c(1972, 4))))
uecm_c_p_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_c, order = c(p, p, p, p, p), data = data_wide_97Q4, start = c(1972, 4))))
uecm_ct_p_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_ct, order = c(p, p, p, p, p), data = data_wide_97Q4, start = c(1972, 4))))

Table_I_n_97Q4 <- Table_I(uecm_n_p_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_c_97Q4 <- Table_I(uecm_c_p_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_ct_97Q4 <- Table_I(uecm_ct_p_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_n_97Q4
Table_I_c_97Q4
Table_I_ct_97Q4

# check with restrictions on Prod and Union
uecm_n_p_restr_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_n, order = c(p, 1, p, p, 0), data = data_wide_97Q4, start = c(1972, 4))))
uecm_c_p_restr_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_c, order = c(p, 1, p, p, 0), data = data_wide_97Q4, start = c(1972, 4))))
uecm_ct_p_restr_97Q4 <- lapply(1:7, function(p) uecm(ardl(formula_ct, order = c(p, 1, p, p, 0), data = data_wide_97Q4, start = c(1972, 4))))
Table_I_n_restr_97Q4 <- Table_I(uecm_n_p_restr_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_c_restr_97Q4 <- Table_I(uecm_c_p_restr_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_ct_restr_97Q4 <- Table_I(uecm_ct_p_restr_97Q4, LM_orders = 1:5, latex_type = FALSE)
Table_I_n_restr_97Q4
Table_I_c_restr_97Q4
Table_I_ct_restr_97Q4

# Interestingly, the global search through all possible models, just like in the wide sense replication analysis,
# supports the empirical finding by PSS that the order of Prod is 1 (17 out of the top 20 models support this)
# and also 19 out of 20 top models conclude that the order of Union is 0.
# auto ardl with constant
# almost 30 mins
#   w Prod UR Wedge Union  AIC_pss
#1  1    1  6     5     0 313.7924
#2  1    1  3     5     0 313.6143
#3  1    1  6     4     0 313.5035
#4  3    1  3     5     0 313.2939
#5  3    1  3     4     0 313.2631
#6  3    1  6     4     0 313.2611
#7  3    1  6     5     0 313.1802
#8  1    1  1     5     0 313.1440
#9  1    1  3     4     0 313.1384
#10 1    1  4     5     0 312.8364
#11 1    2  3     5     0 312.8191
#12 1    1  6     5     1 312.8159
#13 2    1  6     5     0 312.8145
#14 1    2  6     5     0 312.8011
#15 1    1  7     5     0 312.7989
#16 1    1  6     6     0 312.7945
#17 3    1  4     5     0 312.7845
#18 3    2  3     5     0 312.7669
#19 3    1  4     4     0 312.7263
#20 4    1  3     5     0 312.7074
#models_top_auto_c_97Q4 <- auto_ardl(formula_c, data = data_wide_97Q4, start = c(1972, 4),
#                                    max_order = 7, grid = TRUE,
#                                    selection = "AIC_pss", selection_minmax = "max")
#models_top_auto_c_97Q4$top_order

# fix the Prod=1 and Union=0
# results practically equal to those of the full wide sense dataset
models_top_auto_c_97Q4 <- auto_ardl(formula_c, data = data_wide_97Q4, start = c(1972, 4),
                                    max_order = 7, fixed_order = c(-1,1,-1,-1,0),
                                    selection = "AIC_pss", selection_minmax = "max")
models_top_auto_c_97Q4$top_orders # ARDL(3,1,3,5,0) 313.2939

# using starting orders
# results practically equal to those of the full wide sense dataset
models_top_auto_c_97Q4 <- auto_ardl(formula_c, data = data_wide_97Q4, start = c(1972, 4),
                                    max_order = 7, starting_order =  c(4,1,4,4,0),
                                    selection = "AIC_pss", selection_minmax = "max", search_type = "ver")
models_top_auto_c_97Q4$top_orders # ARDL(4,1,3,5,0) 0 312.7074


# F and t bound tests -----------------------------------------------------
uecm_model_cases23_97Q4 <- list(uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,3,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,3,4,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,4,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,4,4,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,3,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,3,4,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,4,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,4,4,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,1,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(6,1,6,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(6,1,3,5,0)),
                                uecm(formula_c, data = data_wide_97Q4, start = c(1972, 4), order = c(7,1,3,4,0)))
# just to complete the table. We don't actually examine the case with trend as we already know the structure of the data
uecm_model_cases45_97Q4 <- list(uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,3,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,3,4,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,4,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(3,1,4,4,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,3,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,3,4,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,4,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,4,4,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(4,1,1,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(6,1,6,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(6,1,3,5,0)),
                                uecm(formula_ct, data = data_wide_97Q4, start = c(1972, 4), order = c(7,1,3,4,0)))

Table_II_97Q4 <- build_Table_II(uecm_model_cases23_97Q4, uecm_model_cases45_97Q4, cases = c(2,3,4,5), replication = "wide", latex_type = FALSE)
Table_II_97Q4
Table_II_97Q4[1:12,c(5:7)] <- "-"
Table_II_97Q4
# The long-run relationship hypothesis is supported by the vast majority of the candidate models
# pass the F_II, F_III and t_III test:
# ARDL(3,1,3,5,0)
# ARDL(3,1,3,4,0
# ARDL(3,1,4,5,0)
# ARDL(3,1,4,4,0)
# ARDL(4,1,3,4,0)
# ARDL(4,1,4,4,0)
# ARDL(4,1,1,5,0)
# ARDL(7,1,3,4,0)

# Combining the results from Table_I_c_97Q4 and Table_II_97Q4
# ARDL(4,1,3,4,0), ARDL(4,1,4,4,0) and ARDL(4,1,1,5,0) stand out as the most suitable models
# which are all very close to those selected in the wide sense replication analysis

# parsimonious unrestricted conditional ECMs
uecm_c_41340_97Q4 <- uecm(formula_c, order = c(4, 1, 3, 4, 0), data = data_wide_97Q4, start = c(1972, 4))
uecm_c_41440_97Q4 <- uecm(formula_c, order = c(4, 1, 4, 4, 0), data = data_wide_97Q4, start = c(1972, 4))
uecm_c_41150_97Q4 <- uecm(formula_c, order = c(4, 1, 1, 5, 0), data = data_wide_97Q4, start = c(1972, 4))

# data preparation for the plot
uecm_c_coint_eq_97Q4 <- data.frame(w = rep(data_wide_97Q4[,'w'], 12),Time = rep(time(data_wide_97Q4),12),
                                   data = c(data_wide_97Q4[,'w'], coint_eq(uecm_c_41340_97Q4, case = 2),
                                            data_wide_97Q4[,'w'], coint_eq(uecm_c_41340_97Q4, case = 3),
                                            data_wide_97Q4[,'w'], coint_eq(uecm_c_41440_97Q4, case = 2),
                                            data_wide_97Q4[,'w'], coint_eq(uecm_c_41440_97Q4, case = 3),
                                            data_wide_97Q4[,'w'], coint_eq(uecm_c_41150_97Q4, case = 2),
                                            data_wide_97Q4[,'w'], coint_eq(uecm_c_41150_97Q4, case = 3)),
                                   Variables = c(rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4)),
                                                 rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4)),
                                                 rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4)),
                                                 rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4)),
                                                 rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4)),
                                                 rep("w", nrow(data_wide_97Q4)),
                                                 rep("Long-Run Relationship", nrow(data_wide_97Q4))),
                                   case = c(rep("Case 2",nrow(data_wide_97Q4)*2),
                                            rep("Case 3",nrow(data_wide_97Q4)*2),
                                            rep("Case 2",nrow(data_wide_97Q4)*2),
                                            rep("Case 3",nrow(data_wide_97Q4)*2),
                                            rep("Case 2",nrow(data_wide_97Q4)*2),
                                            rep("Case 3",nrow(data_wide_97Q4)*2)),
                                   ardl = c(rep("ARDL(4,1,3,4,0)",nrow(data_wide_97Q4)*4),
                                            rep("ARDL(4,1,4,4,0)",nrow(data_wide_97Q4)*4),
                                            rep("ARDL(4,1,1,5,0)",nrow(data_wide_97Q4)*4)))

ggplot(data = uecm_c_coint_eq_97Q4, aes(x = Time, y = data, group = Variables, linetype = Variables)) +
    geom_line() +
    facet_grid(case ~ ardl, scales = "free") +
    labs(y = 'Log Scale') +
    theme(legend.position="bottom")

# Long-Run multipliers
# The wide sense replication long-run multipliers are robust in changes of the regime examined
lr_mult_41340_97Q4 <- multipliers(uecm_c_41340_97Q4)
lr_mult_41340_97Q4
tab_lr_mult_41340_97Q4 <- rbind(lr_mult_41340_97Q4[-1,], lr_mult_41340_97Q4[1,])[,c(1:3,5)]
colnames(tab_lr_mult_41340_97Q4)[1] <- "variable"
print(xtable(tab_lr_mult_41340_97Q4, digits = 3), include.rownames=FALSE)

lr_mult_41440_97Q4 <- multipliers(uecm_c_41440_97Q4)
lr_mult_41440_97Q4
tab_lr_mult_41440_97Q4 <- rbind(lr_mult_41440_97Q4[-1,], lr_mult_41440_97Q4[1,])[,c(1:3,5)]
colnames(tab_lr_mult_41440_97Q4)[1] <- "variable"
print(xtable(tab_lr_mult_41440_97Q4, digits = 3), include.rownames=FALSE)

lr_mult_41150_97Q4 <- multipliers(uecm_c_41150_97Q4)
lr_mult_41150_97Q4
tab_lr_mult_41150_97Q4 <- rbind(lr_mult_41150_97Q4[-1,], lr_mult_41150_97Q4[1,])[,c(1:3,5)]
colnames(tab_lr_mult_41150_97Q4)[1] <- "variable"
print(xtable(tab_lr_mult_41150_97Q4, digits = 3), include.rownames=FALSE)


# Conclusion:
# The wide sense replication results are robust in changes of the regime examined
# in terms of model selection, conclusion about the cointegrating relationship
# and long-run multipliers




##################################################################################################

uecm_n_p_97Q4_PYNONG <- lapply(1:7, function(p) uecm(ardl(formula_n, order = c(p, p, p, p, p), data = data_wide_97Q4_PYNONG, start = c(1972, 4))))
uecm_c_p_97Q4_PYNONG <- lapply(1:7, function(p) uecm(ardl(formula_c, order = c(p, p, p, p, p), data = data_wide_97Q4_PYNONG, start = c(1972, 4))))
uecm_ct_p_97Q4_PYNONG <- lapply(1:7, function(p) uecm(ardl(formula_ct, order = c(p, p, p, p, p), data = data_wide_97Q4_PYNONG, start = c(1972, 4))))

Table_I_n_97Q4_PYNONG <- Table_I(uecm_n_p_97Q4_PYNONG, LM_orders = 1:5, latex_type = FALSE)
Table_I_c_97Q4_PYNONG <- Table_I(uecm_c_p_97Q4_PYNONG, LM_orders = 1:5, latex_type = FALSE)
Table_I_ct_97Q4_PYNONG <- Table_I(uecm_ct_p_97Q4_PYNONG, LM_orders = 1:5, latex_type = FALSE)
Table_I_n_97Q4_PYNONG
Table_I_c_97Q4_PYNONG
Table_I_ct_97Q4_PYNONG

# auto ardl with constant
# almost 30 mins
# We don't notice a strong pattern for the orders
# The restriction about the order of Prod being 1, as in both the narrow and wide replications, is not met here
# The restriction about the order of Union being 0 is also not met.
#   w Prod UR Wedge Union  AIC_pss
#1  7    3  5     5     6 307.7584
#2  6    3  5     7     6 307.6766
#3  4    3  7     4     2 307.5885
#4  7    3  5     7     6 307.5455
#5  7    3  6     5     6 307.5266
#6  7    3  5     6     6 307.3526
#7  4    3  7     5     2 307.3150
#8  7    3  6     5     7 307.3092
#9  7    4  5     5     6 307.1866
#10 6    3  5     5     6 307.1025
#11 4    3  7     4     3 307.0332
#12 7    3  5     5     7 307.0331
#13 6    3  6     5     6 307.0135
#14 6    3  6     5     7 306.9918
#15 6    3  6     7     6 306.9689
#16 6    3  5     7     7 306.9576
#17 7    4  5     6     6 306.9547
#18 6    4  5     7     6 306.9506
#19 7    4  5     7     6 306.9505
#20 6    3  7     5     7 306.9379
#models_top_auto_c_97Q4_PYNONG <- auto_ardl(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4),
#                                           max_order = 7, grid = TRUE, selection = "AIC_pss", selection_minmax = "max")
#models_top_auto_c_97Q4_PYNONG$top_order

# using starting orders
# The results match those of the narrow sense replication more than those of the wide sense replication
models_top_auto_c97Q4_PYNONG <- auto_ardl(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4),
                                          max_order = 7, starting_order =  c(6,3,5,7,6),
                                          selection = "AIC_pss", selection_minmax = "max", search_type = "ver")
models_top_auto_c97Q4_PYNONG$top_orders # ARDL(6,3,5,7,6) 307.6766


# F and t bound tests -----------------------------------------------------
uecm_model_cases23_97Q4_PYNONG <- list(uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,5,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,5,7,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,4,2)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,7,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,6,5,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,6,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,5,2)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,5,5,6)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,4,3)),
                                       uecm(formula_c, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,6,5,6)))
# just to complete the table. We don't actually examine the case with trend as we already know the structure of the data
uecm_model_cases45_97Q4_PYNONG <- list(uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,5,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,5,7,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,4,2)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,7,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,6,5,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(7,3,5,6,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,5,2)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,5,5,6)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(4,3,7,4,3)),
                                       uecm(formula_ct, data = data_wide_97Q4_PYNONG, start = c(1972, 4), order = c(6,3,6,5,6)))

Table_II_97Q4_PYNONG <- build_Table_II(uecm_model_cases23_97Q4_PYNONG, uecm_model_cases45_97Q4_PYNONG, cases = c(2,3,4,5), replication = "wide", latex_type = FALSE)
Table_II_97Q4_PYNONG
Table_II_97Q4_PYNONG[1:10,c(5:7)] <- "-"
Table_II_97Q4_PYNONG
# pass the F_II and F_III tests but not the t_III:
# ARDL(7,3,5,5,6)
# ARDL(6,3,5,7,6)
# ARDL(7,3,5,7,6)
# ARDL(7,3,6,5,6)
# ARDL(7,3,5,6,6)
# ARDL(6,3,5,5,6)
# ARDL(6,3,6,5,6)

# Combining the results from Table_I_c_97Q4_PYNONG and Table_II_97Q4_PYNONG
# ARDL(6,3,5,7,6), ARDL(6,3,5,5,6) and ARDL(6,3,6,5,6) stand out as the most suitable models
# which are all very close to the one selected in the narrow sense replication analysis

# parsimonious unrestricted conditional ECMs
uecm_c_63576_97Q4_PYNONG <- uecm(formula_c, order = c(6,3,5,7,6), data = data_wide_97Q4_PYNONG, start = c(1972, 4))
uecm_c_63556_97Q4_PYNONG <- uecm(formula_c, order = c(6,3,5,5,6), data = data_wide_97Q4_PYNONG, start = c(1972, 4))
uecm_c_63656_97Q4_PYNONG <- uecm(formula_c, order = c(6,3,6,5,6), data = data_wide_97Q4_PYNONG, start = c(1972, 4))

# data preparation for the plot
uecm_c_coint_eq_97Q4_PYNONG <- data.frame(w = rep(data_wide_97Q4_PYNONG[,'w'], 12),Time = rep(time(data_wide_97Q4_PYNONG),12),
                                          data = c(data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63576_97Q4_PYNONG, case = 2),
                                                   data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63576_97Q4_PYNONG, case = 3),
                                                   data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63556_97Q4_PYNONG, case = 2),
                                                   data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63556_97Q4_PYNONG, case = 3),
                                                   data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63656_97Q4_PYNONG, case = 2),
                                                   data_wide_97Q4_PYNONG[,'w'], coint_eq(uecm_c_63656_97Q4_PYNONG, case = 3)),
                                          Variables = c(rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("w", nrow(data_wide_97Q4_PYNONG)),
                                                        rep("Long-Run Relationship", nrow(data_wide_97Q4_PYNONG))),
                                          case = c(rep("Case 2",nrow(data_wide_97Q4_PYNONG)*2),
                                                   rep("Case 3",nrow(data_wide_97Q4_PYNONG)*2),
                                                   rep("Case 2",nrow(data_wide_97Q4_PYNONG)*2),
                                                   rep("Case 3",nrow(data_wide_97Q4_PYNONG)*2),
                                                   rep("Case 2",nrow(data_wide_97Q4_PYNONG)*2),
                                                   rep("Case 3",nrow(data_wide_97Q4_PYNONG)*2)),
                                          ardl = c(rep("ARDL(6,3,5,7,6)",nrow(data_wide_97Q4_PYNONG)*4),
                                                   rep("ARDL(6,3,5,5,6)",nrow(data_wide_97Q4_PYNONG)*4),
                                                   rep("ARDL(6,3,6,5,6)",nrow(data_wide_97Q4_PYNONG)*4)))

# The long-run relationship is very volatile and unstable across time
ggplot(data = uecm_c_coint_eq_97Q4_PYNONG, aes(x = Time, y = data, group = Variables, linetype = Variables)) +
    geom_line() +
    facet_grid(case ~ ardl, scales = "free") +
    labs(y = 'Log Scale') +
    theme(legend.position="bottom")

# Long-Run multipliers
# All the long-run multipliers except the one for Prod are very unstable even for similar models
lr_mult_63576_97Q4_PYNONG <- multipliers(uecm_c_63576_97Q4_PYNONG)
lr_mult_63576_97Q4_PYNONG
tab_lr_mult_63576_97Q4_PYNONG <- rbind(lr_mult_63576_97Q4_PYNONG[-1,], lr_mult_63576_97Q4_PYNONG[1,])[,c(1:3,5)]
colnames(tab_lr_mult_63576_97Q4_PYNONG)[1] <- "variable"
print(xtable(tab_lr_mult_63576_97Q4_PYNONG, digits = 3), include.rownames=FALSE)

lr_mult_63556_97Q4_PYNONG <- multipliers(uecm_c_63556_97Q4_PYNONG)
lr_mult_63556_97Q4_PYNONG
tab_lr_mult_63556_97Q4_PYNONG <- rbind(lr_mult_63556_97Q4_PYNONG[-1,], lr_mult_63556_97Q4_PYNONG[1,])[,c(1:3,5)]
colnames(tab_lr_mult_63556_97Q4_PYNONG)[1] <- "variable"
print(xtable(tab_lr_mult_63556_97Q4_PYNONG, digits = 3), include.rownames=FALSE)

lr_mult_63656_97Q4_PYNONG <- multipliers(uecm_c_63656_97Q4_PYNONG)
lr_mult_63656_97Q4_PYNONG
tab_lr_mult_63656_97Q4_PYNONG <- rbind(lr_mult_63656_97Q4_PYNONG[-1,], lr_mult_63656_97Q4_PYNONG[1,])[,c(1:3,5)]
colnames(tab_lr_mult_63656_97Q4_PYNONG)[1] <- "variable"
print(xtable(tab_lr_mult_63656_97Q4_PYNONG, digits = 3), include.rownames=FALSE)


# Conclusion:
# Using the wide sense replication data with the deflator used in the narrow sense replication,
# and applying the analysis in the regime examined in the narrow sense replication,
# the results match those of the narrow sense replication more than those of the wide sense replication,
# in terms of model selection and conclusion about the cointegrating relationship.
# All the long-run multipliers except the one for Prod are very unstable even for similar models
# and the long-run relationship in general is very volatile and unstable across time.


# Sensitivity Analysis Conclusion
# Using PYNONG as the deflator makes the long-run multipliers unstable
# and the long-run relationship is very volatile and unstable across time.
# Using PGVA as the deflator, the model selection is more consistent, 
# the long-run relationship hypothesis is fully supported, 
# and the estimation of the long-run multipliers is robust regardless of the regime examined (1970:Q1-1997:Q4 or 1971:Q1-2019:Q4)


# Sensitivity of Coefficients and Adjustment Coefficient ------------------

# AIC_pss optimization
auto_ardl_AIC_pss <- function(i) {
    auto_ardl(formula_c, data = data, start = c(1972, 4), 
              fixed_order = c(i,-1,-1,-1,-1), max_order = 7, grid = TRUE,
              selection = "AIC_pss", selection_minmax = "max")
}
models_full_grid_AIC_pss <- mclapply(1:7, auto_ardl_AIC_pss, mc.cores = 7)

top_orders_full_grid_AIC_pss <- data.frame()
for (i in 1:7) {
    top_orders_full_grid_AIC_pss <- rbind(top_orders_full_grid_AIC_pss, models_full_grid_AIC_pss[[i]]$top_orders)
}
top_orders_full_grid_AIC_pss <- top_orders_full_grid_AIC_pss[order(top_orders_full_grid_AIC_pss$AIC_pss, decreasing = TRUE), ]
top_orders_full_grid_AIC_pss[1:20,]
#w Prod UR Wedge Union  AIC_pss
#1  4    1  0     5     0 590.9790
#2  4    1  3     5     0 590.3261
#3  2    1  0     5     0 590.1678
#4  4    2  0     5     0 590.1261
#5  4    1  0     5     1 590.0044
#6  4    1  0     6     0 589.9855
#7  5    1  0     5     0 589.9828
#8  4    1  1     5     0 589.9822
#9  4    4  0     5     0 589.8598
#10 4    1  0     5     2 589.8535
#11 4    3  0     5     0 589.7704
#12 2    1  3     5     0 589.6904
#13 1    1  3     5     0 589.6516
#14 4    1  3     5     2 589.6343
#15 1    1  0     5     0 589.6070
#16 3    1  0     5     0 589.4113
#17 4    1  4     5     0 589.4024
#18 4    1  3     5     1 589.3952
#19 2    2  0     5     0 589.3864
#20 4    1  2     5     0 589.3393


n_top <- 100
mult_int <- c(); mult_int_p <- c()
mult_prod <- c(); mult_prod_p <- c()
mult_ur <- c(); mult_ur_p <- c()
mult_wedge <- c(); mult_wedge_p <- c()
mult_union <- c(); mult_union_p <- c()
ect_value <- c()
for (i in 1:n_top) {
    ardl_top_models <- ardl(formula_c, data = data, start = c(1972, 4), order = unlist(top_orders_full_grid_AIC_pss[i,-6]))
    mult_table <- multipliers(ardl_top_models)
    
    mult_int <- c(mult_int, mult_table$estimate[1])
    mult_prod <- c(mult_prod, mult_table$estimate[2])
    mult_ur <- c(mult_ur, mult_table$estimate[3])
    mult_wedge <- c(mult_wedge, mult_table$estimate[4])
    mult_union <- c(mult_union, mult_table$estimate[5])
    
    mult_int_p <- c(mult_int_p, mult_table$p.value[1])
    mult_prod_p <- c(mult_prod_p, mult_table$p.value[2])
    mult_ur_p <- c(mult_ur_p, mult_table$p.value[3])
    mult_wedge_p <- c(mult_wedge_p, mult_table$p.value[4])
    mult_union_p <- c(mult_union_p, mult_table$p.value[5])
    
    ect_value <- c(ect_value, 
                   summary(uecm(ardl_top_models))$coeff[rownames(summary(uecm(ardl_top_models))$coeff) == "L(w, 1)",][1])
}
mult_range_table_AIC_pss <- data.frame(term = mult_table$term,
                                       min = c(min(mult_int), min(mult_prod),
                                               min(mult_ur), min(mult_wedge),
                                               min(mult_union)),
                                       max = c(max(mult_int), max(mult_prod),
                                               max(mult_ur), max(mult_wedge),
                                               max(mult_union)),
                                       stat_sig_perc = c(sum(mult_int_p<0.05)/n_top*100,
                                                         sum(mult_prod_p<0.05)/n_top*100,
                                                         sum(mult_ur_p<0.05)/n_top*100,
                                                         sum(mult_wedge_p<0.05)/n_top*100,
                                                         sum(mult_union_p<0.05)/n_top*100))
mult_range_table_AIC_pss
#         term         min         max stat_sig_perc
#1 (Intercept)  1.94683989  2.43369412           100
#2        Prod  0.84683544  0.87867016           100
#3          UR -0.12511515 -0.09043142           100
#4       Wedge  0.05166257  0.20807625             0
#5       Union -0.20240162  0.21381103             0

#print(xtable(rbind(mult_range_table_AIC_pss[-1,], mult_range_table_AIC_pss[1,]), digits = 3), include.rownames = FALSE)

ect_range_table_AIC_pss <- data.frame(term = "ect",
                                      min = min(ect_value),
                                      max = max(ect_value))
ect_range_table_AIC_pss
ect_value_df <- as.data.frame(ect_value)
ect_value_density <- density(ect_value_df$ect_value)
ect_density_plot <- ggplot(ect_value_df, aes(x=ect_value)) + 
    geom_density() +
    geom_vline(aes(xintercept=mean(ect_value), linetype="any"),
               size=0.5) +
    geom_vline(aes(xintercept=mean(ect_value[which(top_orders_full_grid_AIC_pss$UR[1:100] == 0)]), linetype="0"),
               size=0.5) +
    geom_vline(aes(xintercept=mean(ect_value[which(top_orders_full_grid_AIC_pss$UR[1:100] == 3)]), linetype="3"),
               size=0.5) +
    scale_linetype_manual(values = c("any" = "solid", "0" = "dashed", "3" = "dotted")) +
    xlim(range(ect_value_density$x)) +
    labs(linetype=expression(paste("mean of ", pi[y], " when order of UR is:"))) +
    xlab(label = expression(pi[y])) +
    theme(legend.position="bottom")
ect_density_plot

min(ect_value) #-0.1947969
max(ect_value) #-0.1177892
mean(ect_value) #-0.1449666

table_ect_small <- table(top_orders_full_grid_AIC_pss[which(ect_value < mean(ect_value)),"UR"])
table_ect_small
#2  3  4  5  6  7 
#1 29  6  3  4  1 
table_ect_big <- table(top_orders_full_grid_AIC_pss[which(ect_value > mean(ect_value)),"UR"])
table_ect_big
#0  1  2
#44  9  3
all_ect_small <- as.numeric(rep(names(table_ect_small), table_ect_small))
all_ect_big <- as.numeric(rep(names(table_ect_big), table_ect_big))

ect_small_orders_plot <- ggplot(data.frame(all_ect_small), aes(x=all_ect_small)) + 
    geom_density() +
    xlab(label = "Order of UR")
ect_small_orders_plot

ect_big_orders_plot <- ggplot(data.frame(all_ect_big), aes(x=all_ect_big)) + 
    geom_density() +
    xlab(label = "Order of UR") +
    scale_x_continuous(breaks = c(unique(all_ect_big)))
ect_big_orders_plot


#for example test ARDL(4,1,0,5,0) and ARDL(4,1,3,5,0)
ect_test_ur_0 <- ardl(formula_c, data = data, start = c(1972, 4), order = c(4,1,0,5,0))
summary(uecm(ect_test_ur_0))$coeff[rownames(summary(uecm(ect_test_ur_0))$coeff) == "L(w, 1)",][1] #-0.12466

ect_test_ur_3 <- ardl(formula_c, data = data, start = c(1972, 4), order = c(4,1,3,5,0))
summary(uecm(ect_test_ur_3))$coeff[rownames(summary(uecm(ect_test_ur_3))$coeff) == "L(w, 1)",][1] #-0.1582781

multipliers(ect_test_ur_0)
#        term    estimate  std.error t.statistic      p.value
#1 (Intercept)  2.29240443 0.47028696  4.87448004 2.466339e-06
#2        Prod  0.85506003 0.07197622 11.87975711 3.774809e-24
#3          UR -0.11017513 0.03193436 -3.45004936 7.050707e-04
#4       Wedge  0.17361626 0.15293107  1.13525829 2.578466e-01
#5       Union  0.01667649 0.41843949  0.03985401 9.682557e-01
multipliers(ect_test_ur_3)
#        term    estimate  std.error t.statistic      p.value
#1 (Intercept)  2.18957431 0.41792400   5.2391686 4.754566e-07
#2        Prod  0.85825241 0.05856830  14.6538723 6.563274e-32
#3          UR -0.10780458 0.02729022  -3.9503007 1.144411e-04
#4       Wedge  0.12035187 0.12911175   0.9321528 3.525874e-01
#5       Union -0.04118141 0.35605015  -0.1156618 9.080579e-01

#print(xtable(rbind(multipliers(ect_test_ur_3)[-1,], multipliers(ect_test_ur_3)[1,])), include.rownames = FALSE)


# R^2 and ect coefficient behaviour ---------------------------------------

# Why R^2 drop in wide sense replication? ---------------------------------
# Behavior of error correction coefficient depending on inclusion/exclusion lags for the first differences of UR & Union


# selected model for wide replication
modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 0), data = data, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3286984
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.12466 
# including lags for the first differences of the UR and Union variables
modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 0), data = data, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3301159
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.162593 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 4), data = data, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3234368
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.1181488 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 4), data = data, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3290338
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.1569022
# under the period covered in the wide sense replication, including lags of first differences has not an impact on R^2
# but when we increase the order of UR, the magnitude of the error correction coefficient grows (especially between 3-6)

# check if the sample size matters
modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 0), data = data_wide_97Q4, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3854668
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.2875988 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 0), data = data_wide_97Q4, start = c(1972, 4)))
modelToTest$adj.r.squared #0.422776
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.3914479 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 4), data = data_wide_97Q4, start = c(1972, 4)))
modelToTest$adj.r.squared #0.375932
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.330778 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 4), data = data_wide_97Q4, start = c(1972, 4)))
modelToTest$adj.r.squared # 0.4135793
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.4006022 
# Indeed, under the sample period covered in the narrow sense replication, using the wide sense data,
# it does increases R^2 and the ect coefficient.
# Including lags of first differences, especially of UR, it also increases the R^2 and the ect coefficient further.
# But it still doesn't reach the narrow sense replication R^2.

# We know that R^2 depends on the scale and variability of the dependent variable
# Check its impact here
# We use the deflator from the narrow sense replication to get close to the narrow sense data,
# especially focusing on the dependent variable (w). The deflator does not affect the focus variables UR and Union
modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 0), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3540362
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.1839566 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 0), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3947719
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.2910388 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 0, 5, 4), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3864317
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.06237176 

modelToTest <- summary(uecm(formula_c, order = c(4, 1, 4, 5, 4), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.4456046
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.1878699 
# the same small increase in R^2 exists here
# the same pattern for the ect coefficient exists here but interestingly its magnitude drops when we increase the order of UNION

# adapt the models on the new dataset
modelToTest <- summary(uecm(formula_c, order = c(6,3,0,7,0), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.3636522
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.1343429 

modelToTest <- summary(uecm(formula_c, order = c(6,3,5,7,0), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.4261027
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.2933774 

modelToTest <- summary(uecm(formula_c, order = c(6,3,0,7,6), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.4290298
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #0.112017

modelToTest <- summary(uecm(formula_c, order = c(6,3,5,7,6), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.5245361
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.07440218 


modelToTest <- summary(uecm(formula_c, order = c(6,3,5,5,0), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.4059395
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.2721676 

modelToTest <- summary(uecm(formula_c, order = c(6,3,0,5,5), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.4141923
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #0.09765529

modelToTest <- summary(uecm(formula_c, order = c(6,3,5,5,5), data = data_wide_97Q4_PYNONG, start = c(1972, 4)))
modelToTest$adj.r.squared #0.5037181
modelToTest$coeff[rownames(modelToTest$coeff) == "L(w, 1)",][1] #-0.05316692
