library(metRology)
data <- read.table("combined_data.txt",header=TRUE)

## BEVE:   data[,1]
## RAWM:   data[,2]
## OIL:    data[,3]
## NICK:   data[,4]
## COP:    data[,5]
## EXCH:   data[,6]
## INDPRO: data[,7]

## General setup for p = 1 (Generates output for BEVE now!)
y <- diff(log(data[,1]))
x <- cbind(diff(log(data[,7])),diff(log(data[,6]))) ## prod & exch

obs <- length(y)
mod13 <- MARX::selection.lag.lead(y[2:(obs-1)],x[2:(obs-1),],1) ## Check both loglik
mod2  <- MARX::selection.lag.lead(y[2:(obs-1)],x[3:obs,],1)     ## Check second loglik
mod4  <- MARX::selection.lag.lead(y[2:(obs-1)],x[1:(obs-2),],1) ## Check first loglik

LL.models <- c(mod13$loglikelihood[1],mod2$loglikelihood[2],mod13$loglikelihood[2], mod4$loglikelihood[1])
which.max(LL.models)

summary(MARX::mixed(100*y[2:(obs-1)],100*x[2:(obs-1),],0,1)) ## Multiplication by 100 to get percentages
summary(MARX::mixed(100*y,NULL,0,1)) ## Multiplication by 100 to get percentages
## Note: the summary(..) should be changed to the model that is chosen in which.max(LL.models)!!

## General setup for p = 2 (Generates output for RAWM now!)
y <- diff(log(data[,2]))
x <- cbind(diff(log(data[,7])),diff(log(data[,6]))) ## prod & exch

obs <- length(y)
mod184 <- MARX::selection.lag.lead(y[3:(obs-2)],x[3:(obs-2),],2)
mod750 <- MARX::selection.lag.lead(y[3:(obs-2)],x[2:(obs-3),],2)
mod600 <- MARX::selection.lag.lead(y[3:(obs-2)],x[1:(obs-4),],2)
mod029 <- MARX::selection.lag.lead(y[3:(obs-2)],x[4:(obs-1),],2)
mod003 <- MARX::selection.lag.lead(y[3:(obs-2)],x[5:obs,],2)

LL2 <- c(mod184$loglikelihood[1],
         mod029$loglikelihood[2],
         mod003$loglikelihood[3],
         mod184$loglikelihood[3],
         mod750$loglikelihood[2],
         mod600$loglikelihood[1],
         mod750$loglikelihood[1],
         mod184$loglikelihood[2],
         mod029$loglikelihood[3])

which.max(LL2)

summary(MARX::mixed(100*y[3:(obs-2)],100*x[3:(obs-2),],0,2)) ## Multiplication by 100 to get percentages
summary(MARX::mixed(y,NULL,1,1)) ## Multiplication by 100 to get percentages
## Note: the summary(..) should be changed to the model that is chosen in which.max(LL.models)!!


## Notes:
## Following the model selection procedure, p=1 means checking 4 different models
## The notation "mod13" stands for estimating the first and third model in line
## For p=1: 
## mod1 is y_t on y_t-1 and x_t (ARDL, pseudo model)
## mod2 is y_t on y_t+1 and x_t+1 (theoretically equivalent to mod1)
## mod3 is y_t on y_t+1 and x_t (noncausal as pseudo model)
## mod4 is y_t on y_t-1 and x_t-1 (theoretically equivalent to mod3)
## For p=2 things are a bit more complicated (see table 1 in article)
## Hence: "mod029" means that the first loglikelihood corresponds to no model of interest (0),
## the second loglikelihood to model 2 in line and the third loglikelihood to model 9 in line.
