#====================================================================#
# This is the R code used in the case study of the supplementary 
# appendix of the paper:
# "A Distributional Synthetic Control Method for Policy Evaluation"
#====================================================================#
rm(list=ls())
library(Matrix)
library(quadprog)
set.seed(123)

#--------------------------------------------------------------------------------------------------------------------------------------
#==========================#
# Part I. Define Functions # 
#==========================#
########################################################
# Function: Synthetic Control by Quadratic Programming #
########################################################
# Yseq: full sequence of treatment unit
# Xseq: full sequences of potential control units
SynthCtl=function(Yin,Xin,Yz,Xz){  
# Quadratic programming for minimizing ||Y-X*w||2
Yin=as.matrix(Yin)
Xin=as.matrix(Xin)
Yz=as.matrix(Yz)
Xz=as.matrix(Xz)
Y=c(Yz,Yin)
X=rbind(Xz,Xin)
S=length(Y)             # length of data
n=ncol(X)               # number of potential control units
wMtx=matrix(0,n,1)      # synthetic weights
Synth=matrix(0,S,1)     # Synthetic-control sequence
Dmat=t(X)%*%X/S
dvec=t(X)%*%Y/S
Amat=rbind(matrix(1,1,n),diag(n)) 
Amat=t(Amat)
bvec0=c(1,matrix(0,n,1))
pd_Dmat=nearPD(Dmat)    # Modification: near positive definite matrix
qp=solve.QP(Dmat=as.matrix(pd_Dmat$mat),dvec,Amat,bvec=bvec0,meq=1) 
#qp=solve.QP(Dmat,dvec,Amat,bvec=bvec0,meq=1) 
wMtx=qp$solution           
Synth=Xin%*%wMtx
MSE=mean((Yin-Synth)^2)
list(res1=wMtx,res2=Synth,res3=MSE)}


#--------------------------------------------------------------------------------------------------------------------------------------
#=====================================#
# Part II. Synthetic Control Analysis # 
#=====================================#

#---------------------------------#
# 1. Load Data & Define Variables #
#---------------------------------#
dirpath="e:/research/SyntheticControl/Revision/Empirical/"
Data=read.csv(paste0(dirpath,"CTCP/","synth_smoking.csv"))

### Footnote 1:  Data source: Abadi, Diamon and Hainmueller(2010, JASA)
# Synthetic Control Methods for Comparative Case Studies: Estimating the Effect of California's Tobacco Control Program
# https://econpapers.repec.org/software/bocbocode/S457334.htm
# http://fmwww.bc.edu/repec/bocode/s/synth_smoking.dta sample data file (application/x-stata)

# Variable definitions:
# See Abadi, Diamon and Hainmueller(2010, Table 1)
      state=Data$state                # state names
       year=Data$year                 # year (sampling period: 1970-2000) 
    cigsale=Data$cigsale              # cigarette sales
   lnincome=Data$lnincome             # ln(GDP per capita)
       beer=Data$beer                 # beer consumption per capita
  age15to24=Data$age15to24            # percent aged 15-24
   retprice=Data$retprice             # retail price    

# State indices & names:
NT=length(state)                      # NT: full sample size 
sindx=matrix(0,NT,1)                  # state indices (1,2,3,...)

# Precluding: Alaska, Arizona, District of Columbia, Florida, Hawaii, Maryland, 
# Massachusetts, Michigan, New Jersey, New York, Oregon, Washington 
# See Abadi, Diamon and Hainmueller(2010, Table 2)
sindx[which(state=="Alabama")]=1;               
sindx[which(state=="Arkansas")]=2;               
sindx[which(state=="California")]=3;            
sindx[which(state=="Colorado")]=4;              
sindx[which(state=="Connecticut")]=5;           
sindx[which(state=="Delaware")]=6;              
sindx[which(state=="Georgia")]=7;              
sindx[which(state=="Idaho")]=8;                                                                                                              
sindx[which(state=="Illinois")]=9;                                                                                                              
sindx[which(state=="Indiana")]=10;                                                                                                              
sindx[which(state=="Iowa")]=11;                                                                                                              
sindx[which(state=="Kansas")]=12;                                                                                                              
sindx[which(state=="Kentucky")]=13;                                                                                                              
sindx[which(state=="Louisiana")]=14;                                                                                                              
sindx[which(state=="Maine")]=15;                                                                                                              
sindx[which(state=="Minnesota")]=16;                                                                     
sindx[which(state=="Mississippi")]=17;                                                                     
sindx[which(state=="Missouri")]=18;                                                                     
sindx[which(state=="Montana")]=19;                                                                     
sindx[which(state=="Nebraska")]=20;                                                                     
sindx[which(state=="Nevada")]=21;                                                                     
sindx[which(state=="New Hampshire")]=22;                                                                     
sindx[which(state=="New Mexico")]=23;                           
sindx[which(state=="North Carolina")]=24;                           
sindx[which(state=="North Dakota")]=25;                           
sindx[which(state=="Ohio")]=26;                           
sindx[which(state=="Oklahoma")]=27;                           
sindx[which(state=="Pennsylvania")]=28;                           
sindx[which(state=="Rhode Island")]=29;                           
sindx[which(state=="South Carolina")]=30;                           
sindx[which(state=="South Dakota")]=31;         
sindx[which(state=="Tennessee")]=32;            
sindx[which(state=="Texas")]=33;                
sindx[which(state=="Utah")]=34;                 
sindx[which(state=="Vermont")]=35;              
sindx[which(state=="Virginia")]=36;             
sindx[which(state=="West Virginia")]=37;        
sindx[which(state=="Wisconsin")]=38;            
sindx[which(state=="Wyoming")]=39;              

N=39
Sname=matrix(0,N,1)      # state names 
 i=1
while(i<=N){
Sname[i]=as.character(state[which(sindx==i)])
i=i+1}

# Time series to be explored 
Year=year[which(state=="California")]    # 1970-2000
T=length(Year)                           # number of years 
N=39                                     # number of states 

#-------------------------------------------#
# 2. Define the policy-influencing variable #
#-------------------------------------------#
Y=cigsale

#---------------------------------------------#
# 3. Plot: States' cigarette-sale time series #
#---------------------------------------------#
### Figure A.14
{pdf(paste("E:/Research/SyntheticControl/Empirical/TempPDFs","/state_Csales.pdf",sep=""))
par(mfrow=c(8,5),mar=c(2,2,2,2))
i=1
while(i<=N){
y=Y[which(sindx==i)]
#print(length(y))
plot(Year,y,main=Sname[i],type="l",lty=1,lwd=0.5,col=2,cex=0.5,xlab=" ",ylab=" ")
abline(v=1989,lty=5,col="darkblue")
i=i+1}
par(mfrow=c(1,1))
dev.off()}

#----------------------------------------------#
# 4. Synthetic control of the proposed method  #
#----------------------------------------------#
    cigsale=Data$cigsale              # cigarette sales
   lnincome=Data$lnincome             # ln(GDP per capita)
       beer=Data$beer                 # beer consumption per capita
  age15to24=Data$age15to24            # percent aged 15-24
   retprice=Data$retprice             # retail price    

# Predictors of ADH(2010, Table 1): 
# states x (1) Ln(GDP per capita), (2) Percent aged 15-24, (3) Real price, (4) Beer consumption per capita, 
#          (5) Cigarette sales per capita 1988, (6) sales 1980, (7) sales 1975    
XData=matrix(0,N,7)
j=1
while(j<=4){
if(j==1){X=lnincome[year>=1980 & year<=1988]; Sindx=sindx[year>=1980 & year<=1988]}	
if(j==2){X=age15to24[year>=1980 & year<=1988]; Sindx=sindx[year>=1980 & year<=1988]}	
if(j==3){X=retprice[year>=1980 & year<=1988]; Sindx=sindx[year>=1980 & year<=1988]}	
if(j==4){X=beer[year>=1984 & year<=1988]; Sindx=sindx[year>=1984 & year<=1988]}	
i=1
while(i<=N){
y=X[which(Sindx==i)]
XData[i,j]=mean(na.omit(y))
i=i+1}
j=j+1}
j=5
while(j<=7){
if(j==5){X=cigsale[year==1988]; Sindx=sindx[year==1988]}	
if(j==6){X=cigsale[year==1980]; Sindx=sindx[year==1980]}	
if(j==7){X=cigsale[year==1975]; Sindx=sindx[year==1975]}	
i=1
while(i<=N){
XData[i,j]=X[which(Sindx==i)]
i=i+1}
j=j+1}

# Define time indices
PreStart=1970 
PreEnd=1988 
PostStart=1989 
PostEnd=2000
a=which(Year==PreStart)   # time index of the start of a pre-intervention period  
b=which(Year==PreEnd)     # time index of the end of a pre-intervention period
c=which(Year==PostStart)  # time index of the start of a post-intervention period (policy-change point) 
d=which(Year==PostEnd)    # time index of the end of a post-intervention period    
Pre=Year[a:b]             # pre-intervention period
Post=Year[c:d]            # post-intervention period
Period=c(Pre,Post)        # case-study period

# Time series of outcome variable and Static characteristics
Ymtrx=matrix(0,T,N)
i=1
while(i<=N){
Ymtrx[,i]=Y[which(sindx==i)]
i=i+1}

trtPre=as.matrix(Ymtrx[a:b,3])
pcuPre=Ymtrx[a:b,-3]
trt_x=as.matrix(XData[3,])
pcu_x=t(XData[-3,])
trtPost=as.matrix(Ymtrx[c:d,3])
pcuPost=Ymtrx[c:d,-3]

# Time trends (Specification of h)
k=2
F=matrix(0,(b-a+1),(k+1))
t=1
while(t<=(b-a+1)){
j=0
while(j<=k){
F[t,(j+1)]=t^j
j=j+1}
t=t+1}

# Generate residuals
X=F
trt_alpha=solve(t(X)%*%X)%*%(t(X)%*%trtPre)
pcu_alpha=solve(t(X)%*%X)%*%(t(X)%*%pcuPre)
trtPre_fit=X%*%trt_alpha    # OLS fitted values
pcuPre_fit=X%*%pcu_alpha
trtPre_res=trtPre-trtPre_fit  # OLS residuals
pcuPre_res=pcuPre-pcuPre_fit
RES0=SynthCtl(trtPre,pcuPre,trt_x,pcu_x)
optw0=RES0[[1]]  # optimal weights obtained from the quadratic programming 
syth0=RES0[[2]]  # the pre-intervention synthetic-control data
RES=SynthCtl(trtPre_res,pcuPre_res,trt_x,pcu_x)
optw=RES[[1]]    # optimal weights obtained from the quadratic programming 
syth=RES[[2]]    # the pre-intervention synthetic-control data
synPost0=matrix(0,length(trtPost),1)               
synPost=matrix(0,length(trtPost),1)                
FPost=matrix(0,(d-c+1),(k+1))
s=1
while(s<=(d-c+1)){
j=0
while(j<=k){
FPost[s,(j+1)]=(c-a+s)^j
j=j+1}
s=s+1}
XPost=FPost
trtPost_fit=XPost%*%trt_alpha    # OLS fitted values
pcuPost_fit=XPost%*%pcu_alpha
i=1
while(i<=(N-1)){
synPost0=synPost0+pcuPost[,i]*optw0[i]
synPost=synPost+(pcuPost[,i]-pcuPost_fit[,i])*optw[i]
i=i+1}
synPre=syth+trtPre_fit
synPost=synPost+trtPost_fit
trt_fit=c(trtPre_fit,trtPost_fit)
MySyn=c(synPre,synPost)         # Counterfactual generated by the proposed method
MySyn0=c(syth0,synPost0)        # Counterfactual generated by the conventional method

#--------------------------------------------------------------#
# 5. Synthetic control of Abadi, Diamon and Hainmueller(2010)  #
#--------------------------------------------------------------#
Trt=Ymtrx[,3]
Syn=0.334*Ymtrx[,34]+0.234*Ymtrx[,21]+0.199*Ymtrx[,19]+0.164*Ymtrx[,4]+0.069*Ymtrx[,5] # Counterfactual generated by ADH

#--------------------------------------------------------#
# 6. Optimal combination weights of the proposed method  #
#---------------------------------------------------------#
indx=seq(1,length(optw),1)
windx=indx[optw>0.001]                  # index of potential control states with weights > 0.001
S=Sname[windx]                          # potential control states
W=sprintf("%.3f",round(optw[windx],3))  # optimal weights 
SW=cbind(S,W,windx)                     # state name & optimal weights  
SSW=SW[order(SW[,2],decreasing=TRUE),]  # sort by the descending order of optimal weights
windx=as.numeric(SSW[,3])
LG=cbind(seq(1,length(S)),SSW)
lg=matrix(0,length(S),1)                # legend of the figure 
i=1
while(i<=length(S)){
lg[i]=as.character(paste0(LG[i,1]," : ",LG[i,2],"(",LG[i,3],")"))
i=i+1}
### Figure A.15
{pdf(paste("E:/Research/SyntheticControl/Empirical/TempPDFs","/state_Csales_mtrx.pdf",sep=""))
c=c(3,windx)   # Sname[3]=California                  
matplot(Year,Ymtrx[,-c],main=" ",type="l",ylim=c(0,300),lty=1,lwd=1,col="gray",cex=0.5,xlab=" ",ylab=" ")
lines(Year,Ymtrx[,3],type="l",lwd=4,col=2)
i=1
while(i<=length(windx)){
lines(Year,Ymtrx[,windx[i]],type="l",lwd=1,lty=1,col=4)
text(Year,Ymtrx[,windx[i]],label=i,cex=0.75)
i=i+1}
abline(v=1989,lty=5,col="darkblue")
legend("topright",legend=lg,bty="n",cex=1,y.intersp=1)
legend("bottomright",legend=Sname[3],lty=1,lwd=4,col=2,bty="n")
dev.off()}

#-------------------------------------------------------#
# 7. Optimal combination weights of ADH(2010, Table 2)  #
#-------------------------------------------------------#
windx=c(4,5,19,21,34)                   # index of potential control states
S=Sname[windx]                          # potential control states
W=c(0.164,0.069,0.199,0.234,0.334)      # optimal weights 
SW=cbind(S,W,windx)                     # state name & optimal weights  
SSW=SW[order(SW[,2],decreasing=TRUE),]  # sort by the descending order of optimal weights
windx=as.numeric(SSW[,3])
LG=cbind(seq(1,length(S)),SSW)
lg=matrix(0,length(S),1)                # legend of the figure 
i=1
while(i<=length(S)){
lg[i]=as.character(paste0(LG[i,1]," : ",LG[i,2],"(",LG[i,3],")"))
i=i+1}
### Figure A.16
{pdf(paste("E:/Research/SyntheticControl/Empirical/TempPDFs","/state_Csales_ADH.pdf",sep=""))
c=c(3,windx)   # Sname[3]=California                  
matplot(Year,Ymtrx[,-c],main=" ",type="l",ylim=c(0,300),lty=1,lwd=1,col="gray",cex=0.5,xlab=" ",ylab=" ")
lines(Year,Ymtrx[,3],type="l",lwd=4,col=2)
i=1
while(i<=length(windx)){
lines(Year,Ymtrx[,windx[i]],type="l",lwd=1,lty=1,col=4)
text(Year,Ymtrx[,windx[i]],label=i,cex=0.75)
i=i+1}
abline(v=1989,lty=5,col="darkblue")
legend("topright",legend=lg,bty="n",cex=1,y.intersp=1)
legend("bottomright",legend=Sname[3],lty=1,lwd=4,col=2,bty="n")
dev.off()}

#--------------------------#
# 8. Pre-intervention MSPE #
#--------------------------#
round(mean((Trt[a:b]-MySyn[a:b])^2),3)    # The proposed method
round(mean((Trt[a:b]-MySyn0[a:b])^2),3)   # The conventional method
round(mean((Trt[a:b]-Syn[a:b])^2),3)      # ADH  

#-------------------------------------------------------------#
# 9. Comparison between actual and counterfactual time series #
#-------------------------------------------------------------#
### Figure A.17
{pdf(paste("E:/Research/SyntheticControl/Empirical/TempPDFs","/CTCP_synth.pdf",sep=""))
plot(Year,Trt,main=" ",ylim=c(0,140),type="l",lty=1,lwd=3,col=2,cex=1,xlab=" ",ylab=" ")
lines(Year,MySyn,type="l",lwd=3,lty=2,col=4)
lines(Year,MySyn0,type="l",lwd=3,lty=4,col=1)
lines(Year,Syn,type="l",lwd=3,lty=5,col=3)
abline(v=1989,lty=5,col="darkblue")
legend("bottomleft",legend=c("California","synthetic California (proposed)","synthetic California (conventional)","synthetic California (ADH)"),lty=c(1,2,4,5),lwd=c(3,3,3,3),col=c(2,4,1,3),bty="n",cex=1,y.intersp=1)
dev.off()}












