## Accompanying materials to the Online Appendix to
## "Private returns to R\&D in the presence of spillovers, revisited"
## Forthcoming, Journal of Applied Econometrics
## Giovanni Millo, September 14th 2018

## Enhanced version of Pesaran's CDtest, adding Frees' rank-based test

pcdtest3 <- function (x, ...)
{
    UseMethod("pcdtest3")
}

pcdtest3.formula <-
function (x, data, index = NULL, model = NULL, test = c("cd",
    "sclm", "lm", "friedman", "frees",
    "rho","absrho"), w = NULL, ...)
{
    data <- pdata.frame(data, index = index)
    mymod <- plm(x, data, model = "pooling")
    if (is.null(model) & min(pdim(mymod)$Tint$Ti) < length(mymod$coefficients) +
        1) {
        warning("Insufficient number of observations in time to estimate heterogeneous model: using within residuals",
            call. = FALSE)
        model <- "within"
    }
    index <- attr(model.frame(mymod), "index")
    tind <- as.numeric(index[[2]])
    ind <- as.numeric(index[[1]])
    if (is.null(model)) {
        X <- model.matrix(mymod)
        y <- model.response(model.frame(mymod))
        unind <- unique(ind)
        n <- length(unind)
        k <- dim(X)[[2]]
        tres <- vector("list", n)
        for (i in 1:n) {
            tX <- X[ind == unind[i], ]
            ty <- y[ind == unind[i]]
            tres[[i]] <- lm.fit(tX, ty)$resid
            names(tres[[i]]) <- tind[ind == unind[i]]
        }
    }
    else {
        mymod <- plm(x, data, model = model, ...)
        myres <- mymod$residuals
        X <- model.matrix(mymod) # needed for nlm*
        unind <- unique(ind)
        n <- length(unind)
        t <- min(pdim(mymod)$Tint$Ti)
        nT <- length(ind)
        k <- length(mymod$coefficients)
        tres <- vector("list", n)
        for (i in 1:n) {
            tres[[i]] <- myres[ind == unind[i]]
            names(tres[[i]]) <- tind[ind == unind[i]]
        }
    }
    return(pcdres3(tres = tres, n = n, k = k, X = X, ind=ind, w = w,
           form = paste(deparse(substitute(x))),
           test = match.arg(test), ...))
}


## this panelmodel method here only for adding "rho" and
## "absrho" arguments

pcdtest3.panelmodel <- function(x,
                         test = c("cd", "sclm", "lm",
                                  "friedman", "frees",
                                  "rho", "absrho"),
                         w = NULL, ...)
{
    myres <- resid(x)
    index <- attr(model.frame(x), "index")
    tind <- as.numeric(index[[2]])
    ind <- as.numeric(index[[1]])
    unind <- unique(ind)
    n <- length(unind)
    t <- pdim(x)$Tint$Ti
    nT <- length(ind)
    k <- length(x$coefficients)
    X <- model.matrix(x)
    tres <- vector("list", n)
    for (i in 1:n) {
        tres[[i]] <- myres[ind == unind[i]]
        names(tres[[i]]) <- tind[ind == unind[i]]
    }
    return(pcdres3(tres = tres, n = n, k = k, X = X, ind=ind, w = w,
                   form = paste(deparse(substitute(formula))),
                   test = match.arg(test), ...))
}

pcdtest3.pseries <- function(x, test=c("cd","sclm","lm",
                                       "friedman", "frees",
                                       "rho","absrho"),
                             w=NULL, ...) {

    ## calculates local or global CD test on a pseries 'x' just as it
    ## would on model residuals

    k <- NULL
    X <- NULL

    ## important difference here: a pseries _can_ have NAs

    ## get indices
    tind <- as.numeric(attr(x, "index")[[2]])
    ind <- as.numeric(attr(x, "index")[[1]])

    ## det. number of groups and df
    unind <- unique(ind)
    n <- length(unind)

    ## "pre-allocate" an empty list of length n
    tres<-vector("list", n)

    ## use model residuals, group by group
    ## list of n:
    ## t_i residuals for each x-sect. 1..n
    for(i in 1:n) {
              xnonna <- !is.na(x[ind==unind[i]])
              tres[[i]]<-x[ind==unind[i]][xnonna]
              ## name resids after the time index
              names(tres[[i]])<-tind[ind==unind[i]][xnonna]
              }

    return(pcdres3(tres=tres, n=n, k=k, X=X, ind=ind, w=w,
                  form=paste(deparse(substitute(formula))),
                  test=match.arg(test), ...))
}

pcdres3<-function(tres, n, k, X, ind, w, form, test, exact=TRUE, ...) {

  ## Take list of model residuals, group by group, and calc. test
  ## (from here on, what's needed for rho_ij is ok)
  ## this function is the modulus calculating the test,
  ## to be called from either pcdtest.formula or
  ## pcdtest.panelmodel or pcdtest.pseries

  ## rho_ij matrix
  rho<-matrix(NA,ncol=n,nrow=n)
  ## T_ij matrix
  t.ij<-matrix(NA,ncol=n,nrow=n)

  for(i in 1:n) {
    for(j in 1:n) {

      ## determination of joint range m_i | m_j
      ## m_ij=m_i|m_j, working on names of the residuals' vectors
      m.ij<-intersect( names(tres[[i]]), names(tres[[j]]) )

      ## for this ij do me_i=mean_t(e_it[m_ij]), idem j
      ## and rho and T_ij as in Pesaran, page 18
      ## (as mean(ei)=0 doesn't necessarily hold any more)

      ei<-tres[[i]][m.ij]
      ej<-tres[[j]][m.ij]
      dei<-ei-mean(ei)
      dej<-ej-mean(ej)
      rho[i,j]<-( dei%*%dej )/( sqrt(dei%*%dei) * sqrt(dej%*%dej) )

      ## put this here inside summations, as for unbalanced panels
      ## "common obs. numerosity" T_ij may vary on i,j
      t.ij[i,j]<-length(m.ij)

      }
    }

## rank test a la Friedman:

  ## rk_ij matrix
  rnk <- matrix(NA,ncol=n,nrow=n)
  ## T_ij matrix
  rt.ij<-matrix(NA,ncol=n,nrow=n)

  for(i in 1:n) {
    for(j in 1:n) {

      ## determination of joint range m_i | m_j
      ## m_ij=m_i|m_j, working on names of the residuals' vectors
      m.ij<-intersect( names(tres[[i]]), names(tres[[j]]) )

      ## put length(m.ij) inside summations, as for unbalanced panels
      ## "common obs. num." T_ij and hence avg. rank may vary on i,j

      ## for this ij do me_i=mean_t(e_it[m_ij]), idem j
      ## and rho and T_ij as in Pesaran, page 18
      ## (as mean(ei)=0 doesn't necessarily hold any more)

      ei<-tres[[i]][m.ij]
      ej<-tres[[j]][m.ij]
      avg.rank <- (length(m.ij)+1)/2
      dei<-rank(ei)-avg.rank
      dej<-rank(ej)-avg.rank
      rnk[i,j]<-( dei%*%dej )/( dei%*%dei )

      rt.ij[i,j] <- length(m.ij)

      }
    }

  ## begin features for local test ####################
  ## higher orders omitted for now, use wlag() explicitly

  ## if global test, set all elements in w to 1
  if(is.null(w)) {w<-matrix(1,ncol=n,nrow=n)
                  dep<-""} else { dep<-"local" }

  ## make (binary) selector matrix based on the contiguity matrix w
  ## and extracting elements corresponding to ones in the lower triangle
  ## excluding the diagonal

  ## transform in logicals (0=FALSE, else=TRUE: no need to worry
  ## about row-std. matrices)
  selector.mat<-matrix(as.logical(w),ncol=n)
  ## set upper tri and diagonal to false
  selector.mat[upper.tri(selector.mat,diag=TRUE)]<-FALSE

  ## number of elements in selector.mat
  elem.num<-sum(selector.mat)

  ## end features for local test ######################

  ## Breusch-Pagan or Pesaran statistic for cross-sectional dependence,
  ## robust vs. unbalanced panels:

  switch(test,
   lm = {
    CDstat<-sum((t.ij*rho^2)[selector.mat])
    pCD<-pchisq(CDstat,df=elem.num,lower.tail=F)
    names(CDstat)<-"chisq"
    parm<-elem.num
    names(parm)<-"df"
    testname<-"Breusch-Pagan LM test"
   },
   sclm = {
    CDstat<-sqrt(1/(2*elem.num))*sum((t.ij*rho^2-1)[selector.mat])
    pCD<-2*pnorm(abs(CDstat),lower.tail=F) # Kevin's fix
    names(CDstat)<-"z"
    parm<-NULL
    testname<-"Scaled LM test"
   },
   cd = {
    CDstat<-sqrt(1/elem.num)*sum((sqrt(t.ij)*rho)[selector.mat])
    pCD<-2*pnorm(abs(CDstat),lower.tail=F)
    names(CDstat)<-"z"
    parm<-NULL
    testname<-"Pesaran CD test"
   },
   friedman = {
    r.avg <- 2/(n*(n-1))*sum(rnk[upper.tri(rnk,diag=FALSE)])
    t <- min(rt.ij)
    CDstat <- (t-1)*((n-1)*r.avg +1)
    pCD <- pchisq(CDstat, df=t-1, lower.tail=F)
    names(CDstat) <- "chisq"
    parm <- t-1
    names(parm)<-"df"
    testname <- "Friedman rank test"
   },
   frees = {
    r2.avg <- 2/(n*(n-1))*sum(rnk[upper.tri(rnk,diag=FALSE)]^2)
    t <- min(rt.ij)
    a.t <- 4*(t+2) / (5*(t-1)^2*(t+1))
    b.t <- 2*(5*t + 6) / (5*t*(t-1)*(t+1))
    CDstat <- n * (r2.avg  - 1 / (t - 1))
    if(exact) {
      ## exact
      df1 <- t-1
      df2 <- t*(t-3)/2
#      require(CompQuadForm)
#      pCD <- 1 - davies(x, lambda=c(a.t, b.t), h=c(df1, df2),
#                        delta=c(df1, df2), sigma=0)$Qq
      ## simulate Frees' Q distribution
      #require(EnvStats)
      mydraws <- a.t*(rchisq(10000, df=df1)-df1) +
                 b.t*(rchisq(10000, df=df2)-df2)
      pCD <- 1 - EnvStats::pemp(CDstat, mydraws)
      } else {
      ## approximation
      ## variance of normal approximation to Q as in De Hoyos
      ## and Sarafidis::
      # vQ <- 32/25 * (t+2)^2 / ((t-1)^3*(t+1)^2) +
      #       4/5 * ((5*t+6)^2 * (t-3)) / (t*(t-1)^2*(t+1)^2)
       ## corrected from Frees' paper:
       vQ <- 4*(t-2)*(25*t^2-7*t-54)/(25*t*(t-1)^3*(t+1))
      pCD <- 2 * pnorm(abs(CDstat)/sqrt(vQ), lower.tail=FALSE)
      warning("Using normal approx. to Q", call.=FALSE)
     }

    names(CDstat) <- "Q"
    parm <- t-1
    names(parm)<-"df"
    testname <- "Frees rank test"

   },
   rho = {
    CDstat<-sum(rho[selector.mat])/elem.num
    pCD<-NULL
    names(CDstat)<-"rho"
    parm<-NULL
    testname<-"Average correlation coefficient"
   },
   absrho = {
    CDstat<-sum(abs(rho)[selector.mat])/elem.num
    pCD<-NULL
    names(CDstat)<-"|rho|"
    parm<-NULL
    testname<-"Average absolute correlation coefficient"
   })

  ##(insert usual htest features)
  dname <- paste(deparse(substitute(formula)))
  RVAL <- list(statistic = CDstat, parameter = parm,
               method = paste(testname, "for", dep,
                "cross-sectional dependence in panels"),
               alternative = "cross-sectional dependence",
               p.value = pCD,
               data.name =   dname)
  class(RVAL) <- "htest"
  return(RVAL)
  }

