rm(list=ls())
library(np)
library(plm)
library(cubature)
library(tikzDevice)
setwd("C:/Users/AlexL/Dropbox/inc_dem/data")


###Import Data###

#Five Year Panel
FiveYearRaw=read.csv("FiveYearRawColony.csv")
year.f=factor(FiveYearRaw$year)
yeardummies=model.matrix(~year.f)
colnames(yeardummies)[1]="year.f1950" #Replace intercept in model.matrix with first year dummy
yeardummies[,1]=ifelse(FiveYearRaw$year==1950,1,0)
FiveYearRaw=cbind(FiveYearRaw,yeardummies)
sort1=paste(FiveYearRaw$year,FiveYearRaw$code_numeric) #Redefine sample to better match Stata subset
sort2=paste(FiveYearRaw$year+5,FiveYearRaw$code_numeric)
index_lag=match(sort2,sort1)
FiveYearRaw$samplenew=FiveYearRaw$sample[index_lag]
FiveYearRaw$samplenew[is.na(FiveYearRaw$samplenew)]=FiveYearRaw$sample[is.na(FiveYearRaw$samplenew)]
FiveYearRaw$sample=FiveYearRaw$samplenew
FiveYearRaw["samplenew"]=NULL
FiveYearRaw$samplebalancefe[is.na(FiveYearRaw$samplebalancefe)]=0 #Redefine samplebalancefe
FiveYearRaw$samplebalancefenew=FiveYearRaw$samplebalancefe[index_lag]
FiveYearRaw$samplebalancefenew[is.na(FiveYearRaw$samplebalancefenew)]=FiveYearRaw$samplebalancefe[is.na(FiveYearRaw$samplebalancefenew)]
FiveYearRaw$samplebalancefe=FiveYearRaw$samplebalancefenew
FiveYearRaw["samplebalancefenew"]=NULL
FiveYearRaw$samplebalancegmm[is.na(FiveYearRaw$samplebalancegmm)]=0 #Redefine samplebalancegmm
FiveYearRaw$samplebalancegmmnew=FiveYearRaw$samplebalancegmm[index_lag]
FiveYearRaw$samplebalancegmmnew[is.na(FiveYearRaw$samplebalancegmmnew)]=FiveYearRaw$samplebalancegmm[is.na(FiveYearRaw$samplebalancegmmnew)]
FiveYearRaw$samplebalancegmm=FiveYearRaw$samplebalancegmmnew
FiveYearRaw["samplebalancegmmnew"]=NULL
FiveYearRaw$lrgdpchsq=FiveYearRaw$lrgdpch^2 #Create quadratic term for income
FiveYearRaw$interact=FiveYearRaw$lrgdpch*FiveYearRaw$colony #Create income/colony interaction term
FiveYearRaw$interactsq=FiveYearRaw$lrgdpchsq*FiveYearRaw$colony #Create quadratic interaction term
sort1=paste(FiveYearRaw$year,FiveYearRaw$code_numeric) #Create lagged variables
sort2=paste(FiveYearRaw$year-5,FiveYearRaw$code_numeric)
index_lag=match(sort2,sort1)
FiveYearRaw$lag1lrgdpch=FiveYearRaw$lrgdpch[index_lag]
FiveYearRaw$lag1fhpolrigaug=FiveYearRaw$fhpolrigaug[index_lag]
FiveYearRaw$lag1polity4=FiveYearRaw$polity4[index_lag]
FiveYearRaw$lag1education=FiveYearRaw$education[index_lag]
FiveYearRaw$lag1lpop=FiveYearRaw$lpop[index_lag]
FiveYearRaw$lag1medage=FiveYearRaw$medage[index_lag]
FiveYearRaw$constant=rep(1,nrow(FiveYearRaw)) #Constant
FiveYearRaw$fdfhpolrigaug=ave(FiveYearRaw$fhpolrigaug,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x))) #FD
FiveYearRaw$fdpolity4=ave(FiveYearRaw$polity4,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag1lrgdpch=ave(FiveYearRaw$lag1lrgdpch,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag1education=ave(FiveYearRaw$lag1education,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag1fhpolrigaug=ave(FiveYearRaw$lag1fhpolrigaug,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag1polity4=ave(FiveYearRaw$lag1polity4,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1950=ave(FiveYearRaw$year.f1950,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1955=ave(FiveYearRaw$year.f1955,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1960=ave(FiveYearRaw$year.f1960,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1965=ave(FiveYearRaw$year.f1965,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1970=ave(FiveYearRaw$year.f1970,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1975=ave(FiveYearRaw$year.f1975,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1980=ave(FiveYearRaw$year.f1980,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1985=ave(FiveYearRaw$year.f1985,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1990=ave(FiveYearRaw$year.f1990,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd1995=ave(FiveYearRaw$year.f1995,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fd2000=ave(FiveYearRaw$year.f2000,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
sort1=paste(FiveYearRaw$year,FiveYearRaw$code_numeric) #Create second lags for instruments
sort2=paste(FiveYearRaw$year-10,FiveYearRaw$code_numeric)
index_lag=match(sort2,sort1)
FiveYearRaw$lag2lrgdpch=FiveYearRaw$lrgdpch[index_lag]
FiveYearRaw$lag2fhpolrigaug=FiveYearRaw$fhpolrigaug[index_lag]
FiveYearRaw$lag2polity4=FiveYearRaw$polity4[index_lag]
FiveYearRaw$lag2nsave=FiveYearRaw$nsave[index_lag]
FiveYearRaw$lag2lpop=FiveYearRaw$lpop[index_lag]
FiveYearRaw$lag2education=FiveYearRaw$education[index_lag]
FiveYearRaw$fdlag2lrgdpch=ave(FiveYearRaw$lag2lrgdpch,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag2fhpolrigaug=ave(FiveYearRaw$lag2fhpolrigaug,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
FiveYearRaw$fdlag2polity4=ave(FiveYearRaw$lag2polity4,FiveYearRaw$code_numeric,FUN=function(x) c(NA,diff(x)))
sort1=paste(FiveYearRaw$year,FiveYearRaw$code_numeric) #And third lags
sort2=paste(FiveYearRaw$year-15,FiveYearRaw$code_numeric)
index_lag=match(sort2,sort1)
FiveYearRaw$lag3lrgdpch=FiveYearRaw$lrgdpch[index_lag]
FiveYearRaw$lag3fhpolrigaug=FiveYearRaw$fhpolrigaug[index_lag]
FiveYearRaw$lag3polity4=FiveYearRaw$polity4[index_lag]


###Specification Test###
#Implement Lee (2014) Test#
#First get residuals from given estimation method
FiveYearRawUsed=FiveYearRaw[FiveYearRaw$sample==1,] #Only include "base sample"
FiveYearRawUsed$ID=c(1:nrow(FiveYearRawUsed))

fiveyearused=plm.data(FiveYearRawUsed,index=c("code_numeric","year_numeric"))

fefiveused=plm(polity4~lag(polity4,1)+lag(lrgdpch,1)+year.f1965+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=fiveyearused,model="within")
moralfeused=plm(fhpolrigaug~lag(fhpolrigaug,1)+lag(lrgdpch,1)+lag(lrgdpchsq,1),data=fiveyearused,model="within",effect="twoways")
cervellatifeused=plm(polity4~lag(polity4,1)+lag(lrgdpch,1)+lag(interact,1),data=fiveyearused,model="within",effect="twoways")
cervellatifesqused=plm(polity4~lag(polity4,1)+lag(lrgdpch,1)+lag(interact,1)+lag(lrgdpchsq,1)+lag(interactsq,1),data=fiveyearused,model="within",effect="twoways")


FiveYearRawUsed$usedinFE=FiveYearRawUsed$ID %in% as.numeric(names(fefiveused$residuals)) #Get which observations are used
FiveYearRawUsed$usedinMoral=FiveYearRawUsed$ID %in% as.numeric(names(moralfeused$residuals))
FiveYearRawUsed$usedinCervellati=FiveYearRawUsed$ID %in% as.numeric(names(cervellatifeused$residuals))
FiveYearRawUsed$usedinCervellatiSQ=FiveYearRawUsed$ID %in% as.numeric(names(cervellatifesqused$residuals))
FiveYearRawUsedFE=FiveYearRawUsed[FiveYearRawUsed$usedinFE==T,] #Drop all unused observations
FiveYearRawUsedMoral=FiveYearRawUsed[FiveYearRawUsed$usedinMoral==T,]
FiveYearRawUsedCervellati=FiveYearRawUsed[FiveYearRawUsed$usedinCervellati==T,]
FiveYearRawUsedCervellatiSQ=FiveYearRawUsed[FiveYearRawUsed$usedinCervellatiSQ==T,]
FiveYearRawUsedFE$resid=fefiveused$residuals #Append residuals to data frame
FiveYearRawUsedMoral$resid=moralfeused$residuals
FiveYearRawUsedCervellati$resid=cervellatifeused$residuals
FiveYearRawUsedCervellatiSQ$resid=cervellatifesqused$residuals

e=as.list(aggregate(FiveYearRawUsedFE$resid,by=list(FiveYearRawUsedFE$code_numeric),FUN=function(x) x))
e=e[[-1]]
e=lapply(e,function(x) x-mean(x)) #Demean residuals depending on your model

index=c()
for(i in 1:length(e)){index[i]=(length(e[[i]])>=6)}
e=e[index] #Only keep observations with N>=6

GK=function(u){(1/sqrt(2*pi))*exp(-0.5*u*u)} #Gaussian kernel for convenience
P=2 #Bandwidth parameter (lag length)

## Kernel Functions ##

kbar=function(x){(1-abs(x))*(x<=1)} #Bartlett Kernel
kdan=function(x){sin(pi*x)/(pi*x)} #Daniell Kernel
kqs=function(x){ifelse(x==0,1,(25/(12*pi^2*x^2))*(sin(6*pi*x/5)/(6*pi*x/5)-cos(6*pi*x/5)))} #QS Kernel

kern=function(x){kqs(x)} #Put in the kernel you want to use

Ni=length(e)
Ci=rep(0,Ni)
Di=rep(0,Ni)

ptm=proc.time()
for(i in 1:Ni){
  X=e[[i]]
  N=length(X)
  
  RCX0=function(u){cos(X*u)} #Real part of marginal ECF of Xt, original variable
  ICX0=function(u){sin(X*u)} #Imaginary part
  
  RCX0p=function(u,j){cos(X[j]*u)} #Real part of marginal ECF of Xt at single point
  ICX0p=function(u,j){sin(X[j]*u)} #Imaginary part
  
##Calculate Ci - see page 149 of Lee (2014)##
  
  Ci[i]=0 #Will be summed in for loop
  for(j in 1:(N-1)){
    if((N-1-(j+1))>0){
      firstsum=rep(0,N-1-j)
      for(k in (j+1):(N-1)){
        psisq=function(v){((RCX0p(v,k-j)+mean(RCX0(v)))^2+(ICX0p(v,k-j)+mean(ICX0(v)))^2)*GK(v)}
        firstsum[k-j]=(X[k])^2*integrate(psisq,lower=-3,upper=3)$val
      }
      Ci[i]=Ci[i]+(kern(j/P)*kern(j/P)*(1/(N-j))*sum(firstsum))
    }}
  
##Calculate Di - see page 149 of Lee (2014)##
  
  Di[i]=0 #Will be summed in for loop
  for(j in 1:(N-2)){
    for(l in 1:(N-2)){
      firstsum=c()
      for(m in (max(j,l)+1):N){
        firstsum=c(firstsum,eval(parse(text=paste("function(x){((1/(",N,"-max(",j,",",l,")))*(X[",m,"])^2)^2*",
                                                  "(((RCX0p(x[1],",m,"-",j,")+mean(RCX0(x[1])))*(RCX0p(x[2],",m,"-",l,")+mean(RCX0(x[2]))))^2+((ICX0p(x[1],",m,"-",j,")+mean(ICX0(x[1])))*(ICX0p(x[2],",m,"-",l,")+mean(ICX0(x[2]))))^2)",
                                                  "*GK(x[1])*GK(x[2])}"))))
      }
      integrand=function(x){sum(sapply(firstsum, function(f) f(x)))}
      Di[i]=Di[i]+2*(kern(j/P))^2*(kern(l/P))^2*adaptIntegrate(integrand,lowerLimit=c(-3,-3),upperLimit=c(3,3))$integral
    }}
  print(i)
} #End i loop
ptm-proc.time()


##Test Statistics Ma,Mb##

Ma=0 #Will be summed in for loop
Mb=0 #Will be summed in for loop

for(i in 1:Ni){
  X=e[[i]]
  N=length(X)
  
  X1=function(j){X[(j):N]} #t=j to N
  X2=function(j){X[1:(N-j)]} #t=1 to N-j
  
  RCX0=function(u){cos(X*u)} #Real part of marginal ECF of Xt, original variable
  ICX0=function(u){sin(X*u)} #Imaginary part
  
  RCX02=function(u,j){cos(X2(j)*u)} #Real part of marginal ECF of Xt, t=1 to N-j
  ICX02=function(u,j){sin(X2(j)*u)} #Imaginary part
  RCX0p=function(u,j){cos(X[j]*u)} #Real part of marginal ECF of Xt at single point
  ICX0p=function(u,j){sin(X[j]*u)} #Imaginary part
  
  ## Real/Imaginary Parts of sigma(1,0)(0,v)
  ## Notice that the cos/sin are switched after taking the derivative
  ## See page 149 of Lee (2014) for details
  
  Rxsgm=function(v,j){
    c1=0
    for(q in (j+1):N){
      c1=c1+X[q]*sin(X[q-j]*v)
    }
    c1=c1-mean(X1(j+1))*sum(ICX02(v,j))
    return(c1)}
  
  Ixsgm=function(v,j){
    c1=0
    for(q in (j+1):N){
      c1=c1+X[q]*cos(X[q-j]*v)
    }
    c1=c1-mean(X1(j+1))*sum(RCX02(v,j))
    return(c1)}
  
  firstsum=c()
  for(j in 1:(N-1)){
    integrand=function(v){(1/(N-j))^2*(Rxsgm(v,j)^2+Ixsgm(v,j)^2)*GK(v)}
    firstsum[j]=kern(j/P)*kern(j/P)*(N-j)*integrate(integrand,lower=-3,upper=3)$val
    Ma=Ma+kern(j/P)*kern(j/P)*(N-j)*integrate(integrand,lower=-3,upper=3)$val
  }
  
  Mb=Mb+(sum(firstsum)-Ci[i])/sqrt(Di[i])
  
} #End i loop

Ma=(Ma-sum(Ci))/sqrt(sum(Di))
Mb=(1/sqrt(Ni))*Mb


###Estimation with Bootstrapped Confidence Intervals###

#This estimator is from equations (3) and (4) in Cai & Li (2008)
#First make sure the data is imported as in the "Import Data" code chunk above

#The np package does not keep track of which observations are used
#So here drop all the observations with missing values
#Only include those years/countries for which we have every needed variable
  
unbalanced=FiveYearRaw[FiveYearRaw$sample==1&!is.na(FiveYearRaw$fdfhpolrigaug)&!is.na(FiveYearRaw$fdlag1lrgdpch)&!is.na(FiveYearRaw$fdlag1fhpolrigaug)&!is.na(FiveYearRaw$lag2lrgdpch)&!is.na(FiveYearRaw$lag2fhpolrigaug)&!is.na(FiveYearRaw$lag2education),]

## Define the function returning statistics to be bootstrapped ##
cailiestimator=function(x){
  #Bandwidths previously calculated with nmulti=10
  bwc=npregbw(constant~lag2lrgdpch+lag2fhpolrigaug+lag2education,data=x,bws=c(2.163542,0.7727154,6.149686),bandwidth.compute=F)
  bwd=npregbw(fdlag1fhpolrigaug~lag2lrgdpch+lag2fhpolrigaug+lag2education,data=x,bws=c(0.6718974,0.09467394,0.9041207),bandwidth.compute=F)
  bwgdp=npregbw(fdlag1lrgdpch~lag2lrgdpch+lag2fhpolrigaug+lag2education,data=x,bws=c(0.9036319,0.01263774,1.722905),bandwidth.compute=F)
  bwcol=npregbw(colony~lag2lrgdpch+lag2fhpolrigaug+lag2education,data=x,bws=c(0.3083963,0.01705288,0.7776533),bandwidth.compute=F)
  
  modelc=npreg(bws=bwc)
  modeld=npreg(bws=bwd)
  modelgdp=npreg(bws=bwgdp)
  modelcol=npreg(bws=bwcol)
  
  ## Second Step - Get E(pi(v)*Yit|z) and E(pi(v)*pi(v)'|z) ##
  #Get E(pi(v)*Yit|z)
  
  piy1=fitted(modelc)*x$fdfhpolrigaug
  piy2=fitted(modeld)*x$fdfhpolrigaug
  piy3=fitted(modelgdp)*x$fdfhpolrigaug
  piy4=fitted(modelcol)*x$fdfhpolrigaug
  
  #Bandwidths previously calculated with nmulti=10
  bwpiy1=npregbw(formula=piy1~lag2education,data=x,bws=c(2360772),bandwidth.compute=F)
  bwpiy2=npregbw(formula=piy2~lag2education,data=x,bws=c(4.478629),bandwidth.compute=F)
  bwpiy3=npregbw(formula=piy3~lag2education,data=x,bws=c(0.7545379),bandwidth.compute=F)
  bwpiy4=npregbw(formula=piy4~lag2education,data=x,bws=c(8833743),bandwidth.compute=F)
  
  #Get E(pi(v)*pi(v)'|z)
  
  pi11=fitted(modelc)*fitted(modelc)
  pi12=fitted(modelc)*fitted(modeld)
  pi13=fitted(modelc)*fitted(modelgdp)
  pi14=fitted(modelc)*fitted(modelcol)
  pi22=fitted(modeld)*fitted(modeld)
  pi23=fitted(modeld)*fitted(modelgdp)
  pi24=fitted(modeld)*fitted(modelcol)
  pi33=fitted(modelgdp)*fitted(modelgdp)
  pi34=fitted(modelgdp)*fitted(modelcol)
  pi44=fitted(modelcol)*fitted(modelcol)
  
  #Bandwidths previously calculated with nmulti=10
  bwpi11=npregbw(formula=pi11~lag2education,data=x,bws=c(4.268014),bandwidth.compute=F)
  bwpi12=npregbw(formula=pi12~lag2education,data=x,bws=c(1.54178),bandwidth.compute=F)
  bwpi13=npregbw(formula=pi13~lag2education,data=x,bws=c(0.6602295),bandwidth.compute=F)
  bwpi14=npregbw(formula=pi14~lag2education,data=x,bws=c(0.5047177),bandwidth.compute=F)
  bwpi22=npregbw(formula=pi22~lag2education,data=x,bws=c(0.7920014),bandwidth.compute=F)
  bwpi23=npregbw(formula=pi23~lag2education,data=x,bws=c(1.268894),bandwidth.compute=F)
  bwpi24=npregbw(formula=pi24~lag2education,data=x,bws=c(0.9803362),bandwidth.compute=F)
  bwpi33=npregbw(formula=pi33~lag2education,data=x,bws=c(0.8383041),bandwidth.compute=F)
  bwpi34=npregbw(formula=pi34~lag2education,data=x,bws=c(0.5394411),bandwidth.compute=F)
  bwpi44=npregbw(formula=pi44~lag2education,data=x,bws=c(0.5395177),bandwidth.compute=F)
  
  #Get g(z)
  
  #eval1=seq(1,10,by=0.1) #See quantile(x$education,probs=seq(0.05,.95,by=.05),na.rm=T)
  eval1=unbalanced$lag2education
  
  gz=matrix(NA,length(eval1),4)
  
  for(i in 1:length(eval1)){
    eval=data.frame(eval1[i])
    P11=fitted(npreg(bws=bwpi11,exdat=eval))
    P12=fitted(npreg(bws=bwpi12,exdat=eval))
    P13=fitted(npreg(bws=bwpi13,exdat=eval))
    P14=fitted(npreg(bws=bwpi14,exdat=eval))
    P22=fitted(npreg(bws=bwpi22,exdat=eval))
    P23=fitted(npreg(bws=bwpi23,exdat=eval))
    P24=fitted(npreg(bws=bwpi24,exdat=eval))
    P33=fitted(npreg(bws=bwpi33,exdat=eval))
    P34=fitted(npreg(bws=bwpi34,exdat=eval))
    P44=fitted(npreg(bws=bwpi44,exdat=eval))
    P21=P12
    P31=P13
    P32=P23
    P41=P14
    P42=P24
    P43=P34
    P=matrix(c(P11,P12,P13,P14,P21,P22,P23,P24,P31,P32,P33,P34,P41,P42,P43,P44),4,4,byrow=T)
    PY1=fitted(npreg(bws=bwpiy1,exdat=eval))
    PY2=fitted(npreg(bws=bwpiy2,exdat=eval))
    PY3=fitted(npreg(bws=bwpiy3,exdat=eval))
    PY4=fitted(npreg(bws=bwpiy4,exdat=eval))
    PY=c(PY1,PY2,PY3,PY4)
    gz[i,]=solve(P)%*%PY
  }
  return(gz)
}

## Now do resampling ##

numiter=999 #Number of iterations
eval1=seq(1,10,by=0.1) #See quantile(x$education,probs=seq(0.05,.95,by=.05),na.rm=T)
bootarray=array(NA,dim=c(length(eval1),4,numiter))

set.seed(123)
for(i in 1:numiter){
  ## Cross-sectional resample ##
  bsindex1=sample(unique(unbalanced$code_numeric),replace=T)
  bsindex2=c() #Inelegant but functional
  for(j in 1:length(unique(bsindex1))){
    bsindex2=c(bsindex2,rep(which(unbalanced$code_numeric==sort(unique(bsindex1))[j]),table(bsindex1)[j]))
  }
  data=unbalanced[bsindex2,]
  ## Time-dimension resample ##
  #Create an index "code_new" that treats the resampled countries separately
  #Then the block bootstrap can be applied to each one
  data$code_new=ifelse(as.numeric(row.names(data))==round(as.numeric(row.names(data))),data$code_numeric,data$code_numeric+as.numeric(row.names(data))-round(as.numeric(row.names(data))))
  data$index=c(1:NROW(data))
  #Block bootstrap function
  block=function(x){
    b=3 #block length - note we have T=7 at most
    nrep=floor(length(x)/b)
    xindex=c()
    for(k in 1:nrep){
      start=sample((length(x)-b+1),1)
      xindex=c(xindex,x[start:(start+b-1)])
    }
    return(xindex)
  }
  bsindex3=aggregate(data$index,by=list(data$code_new),FUN=function(x) if (length(x)<=3) x else block(x))
  bsindex4=unlist(bsindex3[,2])
  resampledata=data[bsindex4,]
  #Get test statistic
  bootarray[,,i]=cailiestimator(resampledata)
}

estimates=cailiestimator(unbalanced) #Basic estimates
upperindex=round(.975*numiter)
lowerindex=round(.025*numiter)

#Get Percentile CI's for lagged Democracy, lagged Income, and the Colony Dummy
UpCId=rep(NA,length(eval1))
LowCId=rep(NA,length(eval1))
UpCIgdp=rep(NA,length(eval1))
LowCIgdp=rep(NA,length(eval1))
UpCIcolony=rep(NA,length(eval1))
LowCIcolony=rep(NA,length(eval1))
for(i in 1:length(eval1)){
  UpCId[i]=sort(bootarray[i,2,])[upperindex]
  LowCId[i]=sort(bootarray[i,2,])[lowerindex]
  UpCIgdp[i]=sort(bootarray[i,3,])[upperindex]
  LowCIgdp[i]=sort(bootarray[i,3,])[lowerindex]
  UpCIcolony[i]=sort(bootarray[i,4,])[upperindex]
  LowCIcolony[i]=sort(bootarray[i,4,])[lowerindex]
}

## Plots ##

pdf("IncomeXColCI",width=5,height=5)
y1=estimates[,3] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-40,30),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("IncomeXColCIZoom",width=5,height=5)
y1=estimates[,3] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-5,5),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("DemocXColCI",width=5,height=5)
y1=estimates[,2]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-16,16),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("DemocXColCIZoom",width=5,height=5)
y1=estimates[,2]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-2,2),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("ColonyXColCI",width=5,height=5)
y1=estimates[,4]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-8,8),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("ColonyXColCIZoom",width=5,height=5)
y1=estimates[,4]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-1,1),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

#Plot with all six together in layout
pdf("VaryCoefPlot",family="CM Roman",width=6,height=6)
layout(matrix(c(1,3,5,2,4,6),3,2),widths=rep(2,2))
par(mar=c(5.1,4.1,1,2.1))
y1=estimates[,3] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-40,30),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-5,5),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
y1=estimates[,2]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-16,16),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-2,2),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
y1=estimates[,4]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-8,8),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-1,1),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()


###Bootstrap - General Time Effects - Year Dummies in Z###

#This estimator is from equations (3) and (4) in Cai & Li (2008)
#First make sure the data is imported as in the "Import Data" code chunk above
  
#The np package does not keep track of which observations are used
#So here drop all the observations with missing values
#Only include those years/countries for which we have every needed variable
  
unbalanced=FiveYearRaw[FiveYearRaw$sample==1&!is.na(FiveYearRaw$fdfhpolrigaug)&!is.na(FiveYearRaw$fdlag1lrgdpch)&!is.na(FiveYearRaw$fdlag1fhpolrigaug)&!is.na(FiveYearRaw$lag2lrgdpch)&!is.na(FiveYearRaw$lag2fhpolrigaug)&!is.na(FiveYearRaw$lag2education),]

## Define the function returning statistics to be bootstrapped ##
cailiestimator=function(x){
  #Bandwidths previously calculated with nmulti=10
  bwd=npregbw(fdlag1fhpolrigaug~lag2lrgdpch+lag2fhpolrigaug+lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.6718974,0.09467394,0.9041207,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwgdp=npregbw(fdlag1lrgdpch~lag2lrgdpch+lag2fhpolrigaug+lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.9036319,0.01263774,1.722905,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwcol=npregbw(colony~lag2lrgdpch+lag2fhpolrigaug+lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.3083963,0.01705288,0.7776533,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  
  modeld=npreg(bws=bwd)
  modelgdp=npreg(bws=bwgdp)
  modelcol=npreg(bws=bwcol)
  
  ## Second Step - Get E(pi(v)*Yit|z) and E(pi(v)*pi(v)'|z) ##
  #Get E(pi(v)*Yit|z)
  
  piy1=fitted(modeld)*x$fdfhpolrigaug
  piy2=fitted(modelgdp)*x$fdfhpolrigaug
  piy3=fitted(modelcol)*x$fdfhpolrigaug
  
  #Bandwidths previously calculated with nmulti=10
  bwpiy1=npregbw(formula=piy1~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(4.478629,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpiy2=npregbw(formula=piy2~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.7545379,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpiy3=npregbw(formula=piy3~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(8833743,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  
  #Get E(pi(v)*pi(v)'|z)
  pi11=fitted(modeld)*fitted(modeld)
  pi12=fitted(modeld)*fitted(modelgdp)
  pi13=fitted(modeld)*fitted(modelcol)
  pi22=fitted(modelgdp)*fitted(modelgdp)
  pi23=fitted(modelgdp)*fitted(modelcol)
  pi33=fitted(modelcol)*fitted(modelcol)
  
  #Bandwidths previously calculated with nmulti=10
  bwpi11=npregbw(formula=pi11~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.7920014,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpi12=npregbw(formula=pi12~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(1.268894,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpi13=npregbw(formula=pi13~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.9803362,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpi22=npregbw(formula=pi22~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.8383041,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpi23=npregbw(formula=pi23~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.5394411,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  bwpi33=npregbw(formula=pi33~lag2education+year.f1970+year.f1975+year.f1980+year.f1985+year.f1990+year.f1995+year.f2000,data=x,bws=c(0.5395177,0.001,0.001,0.001,0.001,0.001,0.001,0.001),bandwidth.compute=F)
  
  #Get g(z)
  
  #eval1=seq(1,10,by=0.1) #See quantile(x$education,probs=seq(0.05,.95,by=.05),na.rm=T)
  eval1=seq(1,10,by=0.1)
  
  gz=matrix(NA,length(eval1),3)
  
  for(i in 1:length(eval1)){
    eval=data.frame(eval1[i],0,0,0,0,0,0,1)
    P11=fitted(npreg(bws=bwpi11,exdat=eval))
    P12=fitted(npreg(bws=bwpi12,exdat=eval))
    P13=fitted(npreg(bws=bwpi13,exdat=eval))
    P22=fitted(npreg(bws=bwpi22,exdat=eval))
    P23=fitted(npreg(bws=bwpi23,exdat=eval))
    P33=fitted(npreg(bws=bwpi33,exdat=eval))
    P21=P12
    P31=P13
    P32=P23
    P=matrix(c(P11,P12,P13,P21,P22,P23,P31,P32,P33),3,3,byrow=T)
    PY1=fitted(npreg(bws=bwpiy1,exdat=eval))
    PY2=fitted(npreg(bws=bwpiy2,exdat=eval))
    PY3=fitted(npreg(bws=bwpiy3,exdat=eval))
    PY=c(PY1,PY2,PY3)
    gz[i,]=solve(P)%*%PY
  }
  return(gz)
}

## Now do resampling ##

numiter=999 #Number of iterations
eval1=seq(1,10,by=0.1) #See quantile(x$education,probs=seq(0.05,.95,by=.05),na.rm=T)
bootarray=array(NA,dim=c(length(eval1),3,numiter))

set.seed(123)
for(i in 1:numiter){
  ## Cross-sectional resample ##
  bsindex1=sample(unique(unbalanced$code_numeric),replace=T)
  bsindex2=c() #Inelegant but functional
  for(j in 1:length(unique(bsindex1))){
    bsindex2=c(bsindex2,rep(which(unbalanced$code_numeric==sort(unique(bsindex1))[j]),table(bsindex1)[j]))
  }
  data=unbalanced[bsindex2,]
  ## Time-dimension resample ##
  #Create an index "code_new" that treats the resampled countries separately
  #Then the block bootstrap can be applied to each one
  data$code_new=ifelse(as.numeric(row.names(data))==round(as.numeric(row.names(data))),data$code_numeric,data$code_numeric+as.numeric(row.names(data))-round(as.numeric(row.names(data))))
  data$index=c(1:NROW(data))
  #Block bootstrap function
  block=function(x){
    b=3 #block length - note we have T=7 at most
    nrep=floor(length(x)/b)
    xindex=c()
    for(k in 1:nrep){
      start=sample((length(x)-b+1),1)
      xindex=c(xindex,x[start:(start+b-1)])
    }
    return(xindex)
  }
  bsindex3=aggregate(data$index,by=list(data$code_new),FUN=function(x) if (length(x)<=3) x else block(x))
  bsindex4=unlist(bsindex3[,2])
  resampledata=data[bsindex4,]
  #Get test statistic
  bootarray[,,i]=cailiestimator(resampledata)
}

#Basic estimates
estimates=cailiestimator(unbalanced)
upperindex=round(.975*numiter)
lowerindex=round(.025*numiter)

#Get Percentile CI's for lagged Democracy, lagged Income, and the Colony Dummy
UpCId=rep(NA,length(eval1))
LowCId=rep(NA,length(eval1))
UpCIgdp=rep(NA,length(eval1))
LowCIgdp=rep(NA,length(eval1))
UpCIcolony=rep(NA,length(eval1))
LowCIcolony=rep(NA,length(eval1))
for(i in 1:length(eval1)){
  UpCId[i]=sort(bootarray[i,1,])[upperindex]
  LowCId[i]=sort(bootarray[i,1,])[lowerindex]
  UpCIgdp[i]=sort(bootarray[i,2,])[upperindex]
  LowCIgdp[i]=sort(bootarray[i,2,])[lowerindex]
  UpCIcolony[i]=sort(bootarray[i,3,])[upperindex]
  LowCIcolony[i]=sort(bootarray[i,3,])[lowerindex]
}

## Plots ##

pdf("IncomeXColCI",width=5,height=5)
y1=estimates[,2] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-40,30),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("IncomeXColCIZoom",width=5,height=5)
y1=estimates[,2] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-5,5),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("DemocXColCI",width=5,height=5)
y1=estimates[,1]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-16,16),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("DemocXColCIZoom",width=5,height=5)
y1=estimates[,1]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-2,2),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("ColonyXColCI",width=5,height=5)
y1=estimates[,3]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-8,8),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

pdf("ColonyXColCIZoom",width=5,height=5)
y1=estimates[,3]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-1,1),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()

#Plot with all six together in layout
pdf("VaryCoefPlotTDZ2000",width=6,height=6)
layout(matrix(c(1,3,5,2,4,6),3,2),widths=rep(2,2))
par(mar=c(5.1,4.1,1,2.1))
y1=estimates[,2] #Recall estimates is defined above
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-40,30),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged GDP",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-5,5),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIgdp,rev(UpCIgdp)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
y1=estimates[,1]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-16,16),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Lagged Democ",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-2,2),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCId,rev(UpCId)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
y1=estimates[,3]
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-8,8),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
plot(eval1,y1,xlab=expression(paste("Education (",dot(z)["it"]*")",sep="")),ylab="Coef on Colony",main=NULL,type="l",las=1,lwd=2,col="black",ylim=c(-1,1),xlim=c(1.5,9))
polygon(c(eval1,rev(eval1)),c(LowCIcolony,rev(UpCIcolony)),col=gray(0.9),border=NA)
lines(eval1,y1)
abline(h=0,lty=2)
box()
dev.off()


