main_path <- getwd()
fig_path <- paste0(main_path, "/figures")
# The code in "data_preparation_narrow.R" creates the data object "data"
# it also writes a copy of the clean data in "data/data_clean_narrow.csv"
source("data_preparation_narrow.R")
source("functions.R")
data <- data_narrow
rm(data_narrow)
# load packages -----------------------------------------------------------

library(ARDL)
library(lmtest) # for bgtest, resettest
library(tseries) # for jarque.bera.testlmtest
library(olsrr) # for ols_test_breusch_pagan
library(ggplot2)
library(ggpubr)# for ggarrange
library(xtable)


# data plots --------------------------------------------------------------
data0 <- window(data, start = 1972)
data_plot <- data.frame(Time = rep(time(data0), 2),
                        values = c(data0[,"w"], data0[,"Prod"]),
                        variable = c(rep("w", nrow(data0)), rep("Prod", nrow(data0))))
# pdf(paste0(fig_path, "/w_prod_narrow.pdf"), width=5.6, height=4)
plot_w_prod_narrow <- ggplot(data = data_plot, aes(x = Time, y = values, group = variable)) +
    geom_line(aes(linetype = variable)) +
    labs(y = 'Log Scale', linetype = "Variables") +
    theme(legend.position="bottom") +
    scale_x_continuous(breaks = seq(1972, 1997, by = 2))
plot_w_prod_narrow
# dev.off()

data_plot <- data.frame(Time = rep(time(diff(data0)), 2),
                        values = c(diff(data0[,"w"]), diff(data0[,"Prod"])),
                        variable = c(rep("Δw", nrow(diff(data0))), rep("ΔProd", nrow(diff(data0)))))
# cairo_pdf(paste0(fig_path, "/w_prod_diff_narrow.pdf"), width=5.6, height=4)
plot_w_prod_diff_narrow <- ggplot(data = data_plot, aes(x = Time, y = values, group = variable)) +
    geom_line(aes(linetype = variable)) +
    labs(y = 'First Differences', linetype = "Variables") +
    geom_abline(aes(intercept = 0, slope = 0), linetype = 2) +
    theme(legend.position="bottom") +
    scale_x_continuous(breaks = seq(1972, 1997, by = 2))
plot_w_prod_diff_narrow
# dev.off()

data_plot <- data.frame(Time = rep(time(data0), 2),
                        values = c(data0[,"Wedge"], data0[,"Union"]),
                        variable = c(rep("Wedge", nrow(data0)), rep("Union", nrow(data0))))
# pdf(paste0(fig_path, "/wedge_union_narrow.pdf"), width=5.6, height=4)
plot_wedge_union_narrow <- ggplot(data = data_plot, aes(x = Time, y = values, group = variable)) +
    geom_line(aes(linetype = variable)) +
    labs(y = 'Log Scale', linetype = "Variables") +
    theme(legend.position="bottom") +
    scale_x_continuous(breaks = seq(1972, 1997, by = 2))
plot_wedge_union_narrow
# dev.off()

# pdf(paste0(fig_path, "/ur_narrow.pdf"), width=5.6, height=4)
plot_ur_narrow <- ggplot(data = data.frame(data0), aes(x = time(data0), y = UR)) +
    geom_line() +
    labs(x = "Time", y = 'UR') +
    scale_x_continuous(breaks = seq(1972, 1997, by = 2))
plot_ur_narrow
# dev.off()

# cairo_pdf(paste0(fig_path, "/full_narrow.pdf"), width=7, height=5.5)
plot_full_narrow <- ggarrange(plot_w_prod_narrow + scale_x_continuous(breaks = seq(1972, 1997, by = 4)),
                              plot_w_prod_diff_narrow + scale_x_continuous(breaks = seq(1972, 1997, by = 4)),
                    plot_wedge_union_narrow + scale_x_continuous(breaks = seq(1972, 1997, by = 4)),
                    plot_ur_narrow + scale_x_continuous(breaks = seq(1972, 1997, by = 4)),
                    labels = c("(a)", "(b)", "(c)", "(d)"),
                    font.label = list(face = "plain", size = 11),
                    ncol = 2, nrow = 2)
plot_full_narrow
# dev.off()

# Analysis

formula_c = w ~ Prod + UR + Wedge + Union | D7475 + D7579
formula_ct = w ~ Prod + UR + Wedge + Union + trend(w, scale = FALSE) | D7475 + D7579

# calculate unrestricted ECMs from the underlying ARDL(p) models with p = 1, 2, ..., 7 but without lags of differenced Prod. With and without linear trend
# start = c(1972, 01) ->  As mentioned in PSS:
# "To ensure comparability of results for different choices of all p, 
# estimations use the same sample period, 1972q1-1997q4 (T = 104), with the first eight
# observations reserved for the construction of lagged variables".
uecm_c_p <- lapply(1:7, function(p) uecm(ardl(formula_c, order = c(p, 1, p, p, p), data = data, start = c(1972, 01))))
uecm_ct_p <- lapply(1:7, function(p) uecm(ardl(formula_ct, order = c(p, 1, p, p, p), data = data, start = c(1972, 01))))

Table_I_ct <- Table_I(uecm_ct_p, LM_orders = c(1, 4), latex_type = FALSE)
Table_I_ct_ltx <- Table_I(uecm_ct_p, LM_orders = c(1, 4), latex_type = TRUE)
Table_I_c <- Table_I(uecm_c_p, LM_orders = c(1, 4), latex_type = FALSE)
Table_I_c_ltx <- Table_I(uecm_c_p, LM_orders = c(1, 4), latex_type = TRUE)
Table_I_ct
Table_I_c
colnames(Table_I_c_ltx) <- colnames(Table_I_ct_ltx) <- c("p", "AIC", "SBC", "$X^{2}_{SC}(1)$", "$X^{2}_{SC}(4)$")

tabI <- cbind(Table_I_ct_ltx, "", Table_I_c_ltx[,-1])
colnames(tabI)[6] <- ""
table_list <- list(tabI=tabI)

# F and t bound tests ----------------------------------------------------
all_uecm_ct <- list(uecm_ct_p[[4]], uecm_ct_p[[5]], uecm_ct_p[[6]])
all_uecm_c <- list(uecm_c_p[[4]], uecm_c_p[[5]], uecm_c_p[[6]])

Table_II <- build_Table_II(all_uecm_c, all_uecm_ct, cases = c(4,5,3), replication = "narrow", latex_type = FALSE)
Table_II_ltx <- build_Table_II(all_uecm_c, all_uecm_ct, cases = c(4,5,3), replication = "narrow", latex_type = TRUE)
tabII <- cbind(Table_II_ltx[1:4], "", Table_II_ltx[5:6])
colnames(tabII)[5] <- ""
table_list$tabII <- tabII
Table_II

# exact sample bounds F-test
#### Enable (uncomment) the next lines to run. They are comment out because they are time-consuming
#set.seed(2020)
#exact_iv <- bounds_f_test(all_uecm_ct[[1]], case = 4, alpha = 0.05, exact = TRUE, R = 70000)
#set.seed(2020)
#exact_v <- bounds_f_test(all_uecm_ct[[1]], case = 5, alpha = 0.05, exact = TRUE, R = 70000)
#exact_iv$parameters
#exact_v$parameters

# max_order = 6 -> the max order of every p and q is 6
# fixed_order = c(-1,1,-1,-1,-1) -> The order of prod is fixed at q=1. Everything else unrestricted
# starting_order = 5 -> In everty searching iteration, every order q starts from 5 (and can go up to 6 and at minimum 0).
# For the order of w (p), this means that 5 is the minimum order (as the Table I in PSS indicates).
# selection = "AIC_pss" -> We use the same form of AIC as in PSS
# selection_minmax = "max" -> The AIC in PSS is supposed to be maximized
# start = c(1972, 01) ->  As mentioned in PSS:
# "To ensure comparability of results for different choices of all p, 
# estimations use the same sample period, 1972q1-1997q4 (T = 104), with the first eight
# observations reserved for the construction of lagged variables".
all_models <- auto_ardl(formula = formula_c, data = data, start = c(1972, 01), 
                        max_order = 6, fixed_order = c(-1,1,-1,-1,-1), starting_order = 5, 
                        selection = "AIC_pss", selection_minmax = "max")
# Note that the best model is the ARDL(6,1,5,4,5). This is the same model also found in PSS
# In PSS is referred as ARDL(6,0,5,4,5) but is probably a typo,
# as every calculation in PSS is according the ARDL(6,1,5,4,5) model and not the ARDL(6,0,5,4,5).
all_models$top_orders
ardl61545 <- all_models$best_model
# Also, a full search across all possible models also results in the same model.
# Attention! It may take more than 2 minutes to run all the 2058 different models!
# auto_ardl(formula = formula_c, data = data, start = c(1972, 01), max_order = 6, fixed_order = c(-1,1,-1,-1,-1), selection = "AIC_pss", selection_minmax = "max", grid = TRUE)

# Long-Run multipliers
lr_mult <- multipliers(ardl61545)
# Eq. (31)
tab_Eq31 <- rbind(lr_mult[-1,], lr_mult[1,])[,c(1:3,5)]
colnames(tab_Eq31)[1] <- "variable"
table_list$tab_Eq31 <- tab_Eq31
#print(xtable(table_list$tab_Eq31, digits = 3, label = "tab:eq31",
#             caption="Estimates of the levels relationship (Equation 31 in PSS, p.313)"),
#      caption.placement = "top")

# Table III
uecm61545 <- uecm(ardl61545)
recm61545 <- recm(ardl61545, case = 3)
tabIII_uecm <- build_Table_III(uecm61545)
tabIII_recm <- build_Table_III(recm61545)
summary(uecm61545)
summary(recm61545)
table_list$tabIII_uecm <- tabIII_uecm
table_list$tabIII_recm <- tabIII_recm

# diagnostic tests under the table iii ------------------------------------
# Note: They refer to the uecm (actually parts of it), not to the recm in table iii

table_list$uecm_tests <- ecm_tests(uecm61545)
table_list$recm_tests <- ecm_tests(recm61545)


# Long-Run multipliers tests ----------------------------------------------

# test if prod != 1 and wedge != (-1)
# p. 313 in PSS
# "The coefficients of the productivity and the wedge variables 
# are insignificantly different from unity"
prod_t <- (1 - lr_mult[which(lr_mult == "Prod"), "estimate"]) / lr_mult[which(lr_mult == "Prod"), "std.error"]
wedge_t <- (1 + lr_mult[which(lr_mult == "Wedge"), "estimate"]) / lr_mult[which(lr_mult == "Wedge"), "std.error"]
2 * pt(abs(prod_t), df = df.residual(ardl61545), lower.tail = FALSE)
2 * pt(abs(wedge_t), df = df.residual(ardl61545), lower.tail = FALSE)
# or using CIs
lr_mult[which(lr_mult == "Prod"), "estimate"] + c(-2*lr_mult[which(lr_mult == "Prod"), "std.error"], 2*lr_mult[which(lr_mult == "Prod"), "std.error"])
lr_mult[which(lr_mult == "Wedge"), "estimate"] + c(-2*lr_mult[which(lr_mult == "Wedge"), "std.error"], 2*lr_mult[which(lr_mult == "Wedge"), "std.error"])
