## Accompanying materials to the Online Appendix to
## "Private returns to R\&D in the presence of spillovers, revisited"
## Forthcoming, Journal of Applied Econometrics
## Giovanni Millo, September 14th 2018

## Version of pcce() using a generalized inverse, for ill-conditioned
## situations

pcce2 <-
function (formula, data, subset, na.action, model = c("mg", "p"),
    residuals = c("defactored", "standard"), index = NULL, trend = FALSE,
    ...)
{
    effect <- "individual"
    model.name <- paste("cce", match.arg(model), sep = "")
    data.name <- paste(deparse(substitute(data)))
    cl <- match.call()
    plm.model <- match.call(expand.dots = FALSE)
    m <- match(c("formula", "data", "subset", "na.action", "effect",
        "model", "index"), names(plm.model), 0)
    plm.model <- plm.model[c(1, m)]
    plm.model[[1]] <- as.name("plm")
    plm.model$model <- "pooling"
    plm.model <- eval(plm.model, parent.frame())
    index <- attr(model.frame(plm.model), "index")
    ind <- index[[1]]
    tind <- index[[2]]
    pdim <- pdim(plm.model)
    balanced <- pdim$balanced
    nt <- pdim$Tint$nt
    Ti <- pdim$Tint$Ti
    T. <- pdim$nT$T
    n <- pdim$nT$n
    N <- pdim$nT$N
    time.names <- pdim$panel.names$time.names
    id.names <- pdim$panel.names$id.names
    coef.names <- names(coef(plm.model))
    k <- length(coef.names)
    X <- model.matrix(plm.model)
    y <- model.response(model.frame(plm.model))
    t <- min(tapply(X[, 1], ind, length))
 #   if (t < (k + 1))
 #       stop("Insufficient number of time periods")
    if (attr(terms(plm.model), "intercept")) {
        k <- k - 1
        coef.names <- coef.names[-1]
    }
    tcoef <- matrix(NA, nrow = k, ncol = n)
    cceres <- vector("list", n)
    stdres <- vector("list", n)
    if (attr(terms(plm.model), "intercept")) {
        X <- X[, -1, drop = FALSE]
    }
    be <- function(x, index, na.rm = T) tapply(x, index, mean,
        na.rm = na.rm)
    Xm <- apply(X, 2, FUN = be, index = tind)[tind, , drop = FALSE]
    ym <- apply(as.matrix(as.numeric(y)), 2, FUN = be, index = tind)[tind]
    if (attr(terms(plm.model), "intercept")) {
        Hhat <- cbind(ym, Xm, 1)
    }
    else {
        Hhat <- cbind(ym, Xm)
    }
    XMX <- array(dim = c(k, k, n))
    XMy <- array(dim = c(k, 1, n))
    unind <- unique(ind)
    for (i in 1:n) {
        tX <- X[ind == unind[i], , drop = FALSE]
        ty <- y[ind == unind[i]]
        tHhat <- Hhat[ind == unind[i], , drop = FALSE]
        if (trend)
            tHhat <- cbind(tHhat, 1:(dim(tHhat)[[1]]))
        tMhat <- diag(1, length(ty)) - tHhat %*%
            ginv(crossprod(tHhat)) %*% t(tHhat)
                                        #solve(crossprod(tHhat), t(tHhat))
        tXMX <- crossprod(tX, tMhat %*% tX)
        tXMy <- crossprod(tX, tMhat %*% ty)
        XMX[, , i] <- tXMX
        XMy[, , i] <- tXMy
        tb <- ginv(tXMX) %*% tXMy
        tcoef[, i] <- tb
        cceres[[i]] <- tMhat %*% (ty - tX %*% tb)
        ta <- mean(ty - tX)
        stdres[[i]] <- ty - tX %*% tb - ta
    }
    coefmg <- apply(tcoef, 1, mean)
    Rmat <- array(dim = c(k, k, n))
    demcoef <- tcoef - coefmg
    switch(match.arg(model), mg = {
        coef <- coefmg
        for (i in 1:n) Rmat[, , i] <- outer(demcoef[, i], demcoef[,
            i])
        vcov <- 1/(n * (n - 1)) * apply(Rmat, 1:2, sum)
    }, p = {
        sXMX <- apply(XMX, 1:2, sum)
        sXMy <- apply(XMy, 1:2, sum)
        coef <- solve(sXMX, sXMy) #ginv(sXMX) %*% sXMy
        psi.star <- 1/N * sXMX
        for (i in 1:n) Rmat[, , i] <- XMX[, , i] %*% outer(demcoef[,
            i], demcoef[, i]) %*% XMX[, , i]
        R.star <- 1/(n - 1) * apply(Rmat, 1:2, sum) * 1/(t^2)
        Sigmap.star <- solve(psi.star, R.star) %*% solve(psi.star)
        vcov <- Sigmap.star/n
        ccepres <- vector("list", n)
        for (i in 1:n) {
            tX <- X[ind == unind[i], , drop = FALSE]
            ty <- y[ind == unind[i]]
            tHhat <- Hhat[ind == unind[i], , drop = FALSE]
            if (trend) tHhat <- cbind(tHhat, 1:(dim(tHhat)[[1]]))
            tMhat <- diag(1, length(ty)) - tHhat %*%
                ginv(crossprod(tHhat)) %*% t(tHhat)
                                        #solve(crossprod(tHhat), t(tHhat))
            cceres[[i]] <- tMhat %*% (ty - tX %*% coef)
            ta <- mean(ty - tX)
            stdres[[i]] <- ty - tX %*% coef - ta
        }
    })
    switch(match.arg(model), mg = {
        sigma2cce.i <- vector("list", n)
        for (i in 1:n) {
            sigma2cce.i[[i]] <- crossprod(cceres[[i]]) * 1/(length(cceres[[i]]) -
                2 * k - 2)
        }
        sigma2cce <- 1/n * sum(unlist(sigma2cce.i))
    }, p = {
        sigma2cce <- 1/(n * (T. - k - 2) - k) * sum(unlist(lapply(cceres,
            crossprod)))
    })
    sigma2.i <- vector("list", n)
    for (i in 1:n) {
        ty <- y[ind == unind[i]]
        sigma2.i[[i]] <- sum((ty - mean(ty))^2)/(length(ty) -
            1)
    }
    sigma2y <- mean(unlist(sigma2.i))
    r2cce <- 1 - sigma2cce/sigma2y
    switch(match.arg(residuals), standard = {
        residuals <- unlist(stdres)
    }, defactored = {
        residuals <- unlist(cceres)
    })
    df.residual <- nrow(X) - ncol(X)
    fitted.values <- y - residuals
    names(coef) <- rownames(vcov) <- colnames(vcov) <- coef.names
    dimnames(tcoef) <- list(coef.names, id.names)
    pmodel <- attr(plm.model, "pmodel")
    pmodel$model.name <- model
    mgmod <- list(coefficients = coef, residuals = residuals,
        fitted.values = fitted.values, vcov = vcov, df.residual = df.residual,
        model = model.frame(plm.model), sigma = NULL, indcoef = tcoef,
        r.squared = r2cce, call = cl)
    mgmod <- structure(mgmod, pdim = pdim, pmodel = pmodel)
    class(mgmod) <- c("pcce", "panelmodel")
    mgmod
}
