## Accompanying materials to the Online Appendix to
## "Private returns to R\&D in the presence of spillovers, revisited"
## Forthcoming, Journal of Applied Econometrics
## Giovanni Millo, September 14th 2018

## cluster t-bootstrap function, Version 1
## (modified from vcovBoot.R)
## ref.: Cameron Gelbach and Miller, REStat 2008 (CGM)

## needed for standalone operation
describe <- plm:::describe

vcovTBoot <- function(x, cluster = c("group", "time"),
                      type = c("wild", "pairs"),
                      vcov.t = vcovHC, conf.level=0.95,
                      symm = TRUE, impose.h0 = FALSE,
                      reps = 1000, prog.bar = TRUE) {

    ## fetch y, X from original model
    model <- describe(x, "model")
    if (!model %in% c("random", "within", "pooling", "fd")) {
        stop("Model has to be either random, within, pooling or fd model")
    }
    coefnams <- names(coef(x))
    demX <- model.matrix(x, model = model)
    demy <- pmodel.response(x, model = model)

    ## see which ones are needed:
    pdim <- pdim(x)
    nT <- pdim$nT$N
    Ti <- pdim$Tint$Ti
    k <- dim(demX)[[2]]
    n0 <- pdim$nT$n
    t0 <- pdim$nT$T

    groupind <- as.numeric(attr(x$model, "index")[, 1])
    timeind <- as.numeric(attr(x$model, "index")[, 2])
    if (model == "fd") {
        groupi <- as.numeric(groupind)
        selector <- groupi - c(0, groupi[-length(groupi)])
        selector[1] <- 1
        groupind <- groupind[!selector]
        timeind <- timeind[!selector]
        nT <- nT - n0
        Ti <- Ti - 1
        t0 <- t0 - 1
    }
    switch(match.arg(cluster), group = {
        n <- n0
        t <- t0
        relevant.ind <- groupind
        lab <- timeind
    }, time = {
        n <- t0
        t <- n0
        relevant.ind <- timeind
        lab <- groupind
    })

    ## form (vector of-) original t-stat(-s) (CGM App. B, 1.1)
    ## w = (b.hat - 0)/SE(b.hat)
    w <- coeftest(x, vcov=vcov.t)[,3]

    clusters <- unique(relevant.ind)  #names(table(cluster))
    bs.tstats <- matrix(NA, nrow=reps+1, ncol=length(coefnams))
    dimnames(bs.tstats)[[2]] <- coefnams

    ## internal names for boot models
    coefnams0 <- paste("x", 1:length(coefnams), sep="")

    ## first row of bs.tstats is original t-stat (w)
    bs.tstats[1, ] <- w

    ## progress bar
    if (prog.bar == TRUE) {
        pb <- txtProgressBar(min = 0, max = reps, initial = 0,
            style = 3)
    }

    switch(match.arg(type),
           pairs={
               ## reduce number of vertical searches needed by sampling
               ## y and X together in same line
               demdat <- cbind(demy, demX)
               ## pairs bootstrap
               for(i in 1:reps){
                   ## sample n individuals with replacement
                   index <- sample.int(length(clusters), replace=TRUE)
                                        # was: sample(1:length(clusters),
                                        # length(clusters), replace=TRUE)
                   ## take the whole cluster 1:t relative to every
                   ## sampled individual
                   bootdat <- NULL
                   ## make bootstrapped dataset:
                   ## (rbind so we can ignore the length of each cluster if
                   ## the panel be unbal.)
                   for(j in 1:length(index)) {
                       ccdat0 <- demdat[relevant.ind==index[j], , drop=FALSE]
                       ## add indices
                       ccdat <- cbind(rep(j, dim(ccdat0)[[1]]), # id index
                                      1:dim(ccdat0)[[1]],       # time index
                                      ccdat0)                   # boot data
                       bootdat <- rbind(bootdat, ccdat)
                   }
                   dimnames(bootdat) <- list(1:dim(bootdat)[[1]],
                                             c("id", "time", "y", coefnams0))
                   bootdat <- as.data.frame(bootdat)
pluto<<-bootdat
                   ##bootfm <- update(formula(x), y ~ . - 1)
                   ## this can go out of the loop
                   bootfm <- as.formula(
                       paste("y ~", paste(coefnams0, collapse="+"),
                             " - 1"))
plutofm<<-bootfm
                   bootmod <- plm(bootfm, data=bootdat, model="pooling")
                   ## vcov.t has been passed on as function
                   bootSE <- sqrt(diag(vcov.t(bootmod)))

                   btstats <- (coef(bootmod) - coef(x)) / bootSE
                   bs.tstats[i+1,] <- btstats


                   if (prog.bar == TRUE) {
                       setTxtProgressBar(pb, value = i)
                   }
pippo<<-bs.tstats
wpippo<<-w
               }
           }, wild={
               ## wild bootstrap
               ## make fitted and residuals for true mod
               truemod <- lm(demy ~ demX - 1)
               yhat <- fitted(truemod)
               uhat <- residuals(truemod)
               unind <- unique(relevant.ind)
               indlen <- length(unind)
               ## rademacher weights (1 or -1)
               radem <- function(n) {
                   wts <- c(1, -1)
                   radem <- integer(n)
                   for(i in 1:n) radem[i] <- sample(wts)[1]
                   return(radem)
               }
               for(i in 1:reps){
                   ## cluster-sample the weights (not the data)
                   ## by sampling the unique index values, then
                   ## replicating 1 or -1 along each cluster
                   bootu <- uhat * radem(indlen)[relevant.ind]
                   booty <- yhat + bootu

                   ## the wild bootstrap leaves the sample size, indexing etc.
                   ## untouched;
                   ## reconstruct panel dataset for computing robust t-stat

                   wbootdat <- cbind(relevant.ind, lab, booty, demX)
                   dimnames(wbootdat) <- list(1:dim(wbootdat)[[1]],
                                              c("id", "time", "y", coefnams0))
                   wbootdat <- as.data.frame(wbootdat)

                   ##bootfm <- update(formula(x), y ~ . - 1)
                   bootfm <- as.formula(
                       paste("y ~", paste(coefnams0, collapse="+"),
                             " - 1"))
                   bootmod <- plm(bootfm, data=wbootdat, model="pooling")
                   ## vcov.t has been passed on as function
                   bootSE <- sqrt(diag(vcov.t(bootmod)))

                   btstats <- (coef(bootmod) - coef(x)) / bootSE

                   bs.tstats[i+1,] <- btstats

                   if (prog.bar == TRUE) {
                       setTxtProgressBar(pb, value = i)
                   }
               }
           })

    ## estimate bootstrapped critical- or pseudo-p values from distribution of
    ## bootstrapped t-stats

    if(symm) {
        mypvals <- apply(bs.tstats, 2, function(x) sum(abs(x) >= abs(x[1]))/reps)
    } else {
        pseudo.alpha <- matrix(nrow=2, ncol=length(coefnams))
        pseudo.alpha[1, ] <- apply(bs.tstats, 2, function(x) sum(x <= x[1])/reps)
        pseudo.alpha[2, ] <- apply(bs.tstats, 2, function(x) sum(x >= x[1])/reps)
        mypvals <- 2*apply(pseudo.alpha, 2, min)
    }
    names(mypvals) <- coefnams
    #return(mypvals)

    ## reconstruct pseudo-vcov from (pseudo) p-values
    pseudot <- qnorm(mypvals, lower.tail=F)
    pseudoSEs <- coef(x)/pseudot

    pseudovcov <- matrix(nrow=length(pseudoSEs), ncol=length(pseudoSEs))
    dimnames(pseudovcov) <- list(coefnams, coefnams)
    diag(pseudovcov) <- pseudoSEs^2

    return(pseudovcov)
}
