## Accompanying materials to the Online Appendix to
## "Private returns to R\&D in the presence of spillovers, revisited"
## Forthcoming, Journal of Applied Econometrics
## Giovanni Millo, September 14th 2018

## cluster bootstrap function

## needed for standalone operation
describe <- plm:::describe

vcovBoot <- function(x, cluster=c("group", "time"),
                     type=c("wild", "pairs"),
                     reps=1000, prog.bar = TRUE) {

    ## fetch y, X from original model
    model <- describe(x, "model")
    if (!model %in% c("random", "within", "pooling", "fd")) {
        stop("Model has to be either random, within, pooling or fd model")
    }
    coefnams <- names(coef(x))
    demX <- model.matrix(x, model = model)
    demy <- pmodel.response(x, model = model)

    ## see which ones are needed:
    pdim <- pdim(x)
    nT <- pdim$nT$N
    Ti <- pdim$Tint$Ti
    k <- dim(demX)[[2]]
    n0 <- pdim$nT$n
    t0 <- pdim$nT$T

    groupind <- as.numeric(attr(x$model, "index")[, 1])
    timeind <- as.numeric(attr(x$model, "index")[, 2])
    if (model == "fd") {
        groupi <- as.numeric(groupind)
        selector <- groupi - c(0, groupi[-length(groupi)])
        selector[1] <- 1
        groupind <- groupind[!selector]
        timeind <- timeind[!selector]
        nT <- nT - n0
        Ti <- Ti - 1
        t0 <- t0 - 1
    }
    switch(match.arg(cluster), group = {
        n <- n0
        t <- t0
        relevant.ind <- groupind
        lab <- timeind
    }, time = {
        n <- t0
        t <- n0
        relevant.ind <- timeind
        lab <- groupind
    })

    ## reproduce original panel regression as OLS on transformed data
    reg1 <- lm(demy ~ demX - 1)

    clusters <- unique(relevant.ind)  #names(table(cluster))
    bscoefs <- matrix(NA, nrow=reps, ncol=length(coef(reg1)))

    ## progress bar
    if (prog.bar == TRUE) {
        pb <- txtProgressBar(min = 0, max = reps, initial = 0,
            style = 3)
    }

    switch(match.arg(type),
           pairs={
               ## reduce number of vertical searches needed by sampling
               ## y and X together in same line
               demdat <- cbind(demy, demX)
               ## pairs bootstrap
               for(i in 1:reps){
                   ## sample n individuals with replacement
                   index <- sample.int(length(clusters), replace=TRUE)
                                        # was: sample(1:length(clusters),
                                        # length(clusters), replace=TRUE)
                   ## take the whole cluster 1:t relative to every
                   ## sampled individual
                   bootdat <- NULL
                   for(j in index) {
                       ccdat <- demdat[relevant.ind==j, , drop=FALSE]
                       bootdat <- rbind(bootdat, ccdat)
                   }
                   booty <- bootdat[,1]
                   bootX <- bootdat[,-1]
                   bscoefs[i,] <- solve(crossprod(bootX),
                                        crossprod(bootX, booty))
                                        #coef(lm(booty ~ bootX - 1))
                                        #outsource to lapply
                   if (prog.bar == TRUE) {
                       setTxtProgressBar(pb, value = i)
                   }
               }
           }, wild={
               ## wild bootstrap
               ## make fitted and residuals for true mod
               truemod <- lm(demy ~ demX - 1)
               yhat <- fitted(truemod)
               uhat <- residuals(truemod)
               unind <- unique(relevant.ind)
               indlen <- length(unind)
               ## rademacher weights
               radem <- function(n) {
                   wts <- c(1, -1)
                   radem <- integer(n)
                   for(i in 1:n) radem[i] <- sample(wts)[1]
                   return(radem)
               }
               for(i in 1:reps){
                   bootu <- uhat * radem(indlen)[relevant.ind]
                   booty <- yhat + bootu
                   bscoefs[i,] <- solve(crossprod(demX),
                                        crossprod(demX, booty))
                                        #coef(lm(booty ~ bootX - 1))
                                        #outsource to lapply
                   if (prog.bar == TRUE) {
                       setTxtProgressBar(pb, value = i)
                   }
               }
           })

    ## estimate full coef vcov from bootstrapped betas
    myvcov <- cov(bscoefs)
    rownames(myvcov) <- colnames(myvcov) <- coefnams
    return(myvcov)
}
