#=====================================================================#
# This is the second R code used in the empirical study of the paper:
# "A Distributional Synthetic Control Method for Policy Evaluation"
#=====================================================================#
rm(list=ls())
#library(Synth)
library(Matrix)
library(quadprog)
set.seed(123)

#--------------------------------------------------------------------------------------------------------------------------------------
#==========================#
# Part I. Define Functions # 
#==========================#
##########################################
# Function: X-axis for time series plots #
##########################################
X_axis=function(Period,q){  
start_Y=substring(Period[1],1,4)     
start_Q=substring(Period[1],5,6)
  end_Y=substring(Period[length(Period)],1,4)
  end_Q=substring(Period[length(Period)],5,6)
if(start_Q=="01"){s_D="/3/30"}
if(start_Q=="02"){s_D="/6/30"}
if(start_Q=="03"){s_D="/9/30"}
if(start_Q=="04"){s_D="/12/31"}
  if(end_Q=="01"){e_D="/3/30"}
  if(end_Q=="02"){e_D="/6/30"}
  if(end_Q=="03"){e_D="/9/30"}
  if(end_Q=="04"){e_D="/12/31"}
start_D=paste0(start_Y,s_D)
  end_D=paste0(end_Y,e_D)
Ptime=seq(as.Date(start_D),as.Date(end_D),by = "quarter")
Paxis=Ptime[seq(1,length(Ptime),q)]
lPaxis=as.numeric(substring(Paxis,1,4))   # X-axis label 
list(res1=Ptime,res2=Paxis,res3=lPaxis)}

################################################################################
# Function: Generate Data for the treated unit and the potential control units #
################################################################################
# Inputs:
# q: quantile probability index
# a: time index of the start of the pre-intervention period
# e: time index of the end of the post-intervention period
# trt_name: name of the treated unit
# Sname: names of all states
# Sindx: indices of all states
# Smw: state min. wage
# Scode: codes of all states
# Xecon: X features of all states
Data_trtpcu=function(q,a,d,trt_name,SSindx,Sname,Sindx,SmwCh,Scode,Xecon){
# (1) Generate data for the treated unit
trt_indx=Sindx[which(Sname==trt_name)]                                 # index of the treated state 
trt_x=as.matrix(Xecon[,trt_indx]) 
trt0=read.csv(paste0(dirpath,"Data/Results/","Q_",trt_name,".csv"))    # load the mean & quantile evolution data of the treated unit
trt1=trt0[,2:ncol(trt0)]                                               # the mean & quantile sequences of the treated unit in the sampling period
trt=trt1[,q]
# (2) Generate data for the potential control units
Snamesel=Sname[SSindx]                    # state names (N[i]>minN; that is, without missing values)  
Sindxsel0=Sindx[SSindx]                   # state indices (N[i]>minN)  
Sindxsel=Sindxsel0[-which(Sindxsel0==trt_indx)]  # precluding the treated unit from the potential control units 
c_SmwCh=SmwCh[a:d,Sindxsel]                      # state min.wage change time series in the case-study period (N[i]>minN)
pcu_indx=Sindxsel[which(colSums(c_SmwCh)==0)]    # potential control units:= the states without the min.wage change in the case-study period (N[i]>minN)
N_pcu=length(pcu_indx)                           # number of potential control units
pcu_code=Scode[pcu_indx]                         # state codes of the potential control units
pcu_name=Sname[pcu_indx]                         # state names of the potential control units
pcu_x=Xecon[,pcu_indx]                           # econ features of the potential control units in the sampling period
pcu=matrix(0,length(trt),N_pcu)
i=1
while(i<=N_pcu){
pcu0=read.csv(paste0(dirpath,"Data/Results/","Q_",pcu_name[i],".csv")) 
pcu1=pcu0[,2:ncol(pcu0)]                                      
pcu[,i]=pcu1[,q]
i=i+1}
list(res1=trt,res2=pcu,res3=trt_x,res4=pcu_x,res5=N_pcu,res6=pcu_indx,res7=pcu_name,res8=pcu_code)}

Dataemp_trtpcu=function(q,a,d,trt_name,SSindx,Sname,Sindx,SmwCh,Scode,Xecon){
# (1) Generate data for the treated unit
trt_indx=Sindx[which(Sname==trt_name)]                                 # index of the treated state 
trt_x=as.matrix(Xecon[,trt_indx]) 
trt0=read.csv(paste0(dirpath,"Data/Results/","Qemp_",trt_name,".csv"))    # load the mean & quantile evolution data of the treated unit
trt1=trt0[,2:ncol(trt0)]                                               # the mean & quantile sequences of the treated unit in the sampling period
trt=trt1[,q]
# (2) Generate data for the potential control units
Snamesel=Sname[SSindx]                    # state names (N[i]>minN; that is, without missing values)  
Sindxsel0=Sindx[SSindx]                   # state indices (N[i]>minN)  
Sindxsel=Sindxsel0[-which(Sindxsel0==trt_indx)]  # precluding the treated unit from the potential control units 
c_SmwCh=SmwCh[a:d,Sindxsel]                      # state min.wage change time series in the case-study period (N[i]>minN)
pcu_indx=Sindxsel[which(colSums(c_SmwCh)==0)]    # potential control units:= the states without the min.wage change in the case-study period (N[i]>minN)
N_pcu=length(pcu_indx)                           # number of potential control units
pcu_code=Scode[pcu_indx]                         # state codes of the potential control units
pcu_name=Sname[pcu_indx]                         # state names of the potential control units
pcu_x=Xecon[,pcu_indx]                           # econ features of the potential control units in the sampling period
pcu=matrix(0,length(trt),N_pcu)
i=1
while(i<=N_pcu){
pcu0=read.csv(paste0(dirpath,"Data/Results/","Qemp_",pcu_name[i],".csv")) 
pcu1=pcu0[,2:ncol(pcu0)]                                      
pcu[,i]=pcu1[,q]
i=i+1}
list(res1=trt,res2=pcu,res3=trt_x,res4=pcu_x,res5=N_pcu,res6=pcu_indx,res7=pcu_name,res8=pcu_code)}

########################################################
# Function: Synthetic Control by Quadratic Programming #
########################################################
# Yseq: full sequence of treatment unit
# Xseq: full sequences of potential control units
SynthCtl=function(Yin,Xin,Yz,Xz){  
# Quadratic programming for minimizing ||Y-X*w||2
Yin=as.matrix(Yin)
Xin=as.matrix(Xin)
Yz=as.matrix(Yz)
Xz=as.matrix(Xz)
Y=c(Yz,Yin)
X=rbind(Xz,Xin)
S=length(Y)             # length of data
n=ncol(X)               # number of potential control units
wMtx=matrix(0,n,1)      # synthetic weights
Synth=matrix(0,S,1)     # Synthetic-control sequence
Dmat=t(X)%*%X/S
dvec=t(X)%*%Y/S
Amat=rbind(matrix(1,1,n),diag(n)) 
Amat=t(Amat)
bvec0=c(1,matrix(0,n,1))
pd_Dmat=nearPD(Dmat)    # Modification: near positive definite matrix
qp=solve.QP(Dmat=as.matrix(pd_Dmat$mat),dvec,Amat,bvec=bvec0,meq=1) 
#qp=solve.QP(Dmat,dvec,Amat,bvec=bvec0,meq=1) 
wMtx=qp$solution           
Synth=Xin%*%wMtx
MSE=mean((Yin-Synth)^2)
list(res1=wMtx,res2=Synth,res3=MSE)}

##############################################################
## Function: Synthetic Control by the R package of ADH(2011) #
##############################################################
#SynthCtl=function(trtPre,pcuPre,trt_x,pcu_x){  
## Quadratic programming for minimizing ||Y-X*w||2
#trtPre=as.matrix(trtPre)
#pcuPre=as.matrix(pcuPre)
#trt_x=as.matrix(trt_x)
#pcu_x=as.matrix(pcu_x)
#v=rep(1/length(trt_x),length(trt_x))
#res=synth(data.prep.obj=NULL,X1=trt_x,X0=pcu_x,Z1=rbind(trt_x,trtPre),Z0=rbind(pcu_x,pcuPre),custom.v=v) 
#wMtx=res[[2]]           
#Synth=pcuPre%*%wMtx
#MSE=mean((trtPre-Synth)^2)
#list(res1=wMtx,res2=Synth,res3=MSE)}

###################################################
# Function: Generate the counterfactual sequences #
###################################################
counterfactual=function(k,trt,pcu,trt_x,pcu_x,Period,a,b,c,d){
N_pcu=ncol(pcu)
trtPre=trt[a:b]                                                 
trtPost=trt[c:d]                                                
pcuPre=pcu[a:b,]
pcuPost=pcu[c:d,]
F=matrix(0,(b-a+1),(k+1))
Q=matrix(0,(b-a+1),3)
t=1
while(t<=(b-a+1)){
if(substring(Period[t],5,6)=="02"){Q[t,1]=1}
if(substring(Period[t],5,6)=="03"){Q[t,2]=1}
if(substring(Period[t],5,6)=="04"){Q[t,3]=1}
j=0
while(j<=k){
F[t,(j+1)]=t^j
j=j+1}
t=t+1}
X=cbind(F,Q)
trt_alpha=solve(t(X)%*%X)%*%(t(X)%*%trtPre)
pcu_alpha=solve(t(X)%*%X)%*%(t(X)%*%pcuPre)
trtPre_fit=X%*%trt_alpha    # OLS fitted values
pcuPre_fit=X%*%pcu_alpha
trtPre_res=trtPre-trtPre_fit  # OLS residuals
pcuPre_res=pcuPre-pcuPre_fit
RES0=SynthCtl(trtPre,pcuPre,trt_x,pcu_x)
optw0=RES0[[1]]  # optimal weights obtained from the quadratic programming 
syth0=RES0[[2]]  # the pre-intervention synthetic-control data
RES=SynthCtl(trtPre_res,pcuPre_res,trt_x,pcu_x)
optw=RES[[1]]    # optimal weights obtained from the quadratic programming 
syth=RES[[2]]    # the pre-intervention synthetic-control data
synPost0=matrix(0,length(trtPost),1)               
synPost=matrix(0,length(trtPost),1)                
FPost=matrix(0,(d-c+1),(k+1))
QPost=matrix(0,(d-c+1),3)
s=1
while(s<=(d-c+1)){
if(substring(Period[(c-a+s)],5,6)=="02"){QPost[s,1]=1}
if(substring(Period[(c-a+s)],5,6)=="03"){QPost[s,2]=1}
if(substring(Period[(c-a+s)],5,6)=="04"){QPost[s,3]=1}
j=0
while(j<=k){
FPost[s,(j+1)]=(c-a+s)^j
j=j+1}
s=s+1}
XPost=cbind(FPost,QPost)
trtPost_fit=XPost%*%trt_alpha    # OLS fitted values
pcuPost_fit=XPost%*%pcu_alpha
i=1
while(i<=N_pcu){
synPost0=synPost0+pcuPost[,i]*optw0[i]
synPost=synPost+(pcuPost[,i]-pcuPost_fit[,i])*optw[i]
i=i+1}
synPre=syth+trtPre_fit
synPost=synPost+trtPost_fit
trt_fit=c(trtPre_fit,trtPost_fit)
syn=c(synPre,synPost)     
syn0=c(syth0,synPost0)                                     # KDE subsequence of the synthetic control unit in the case-study period       
list(res1=syn,res2=syn0,res3=optw,res4=optw0,res5=trtPre_fit,res6=pcuPre_fit,res7=trt_alpha,res8=pcu_alpha)}


#--------------------------------------------------------------------------------------------------------------------------------------
#====================================================#
# Part II. Distributional Synthetic Control Analysis # 
#====================================================#

#*****************************************************#
# 1. Load Data & Define Variables                     #
# (See "1.LoadData_and_DefineEvents.R" for this part) #
#*****************************************************#
dirpath="e:/research/SyntheticControl/Empirical/"
Data=read.csv(paste0(dirpath,"Data/","county data.csv"))

# Variable definitions:
       time=Data$time                 # YYYYQQ (Sampling period: 1990Q1-2006Q2) 
      state=Data$statename            # state names
      cfips=Data$countyreal           # county fips, https://en.wikipedia.org/wiki/FIPS_county_code 
     county=Data$countyname           # county names
       cpop=Data$countypop2000        # county population, 2000
      cpopd=Data$cntypopdens          # county population density, 2000
      carea=Data$cntyarea             # county land area
     mw_fed=Data$federalmin           # federal min. wage 
     mw_sta=Data$stminwage            # state min. wage
         mw=Data$minwage              # max(mw_fed,mw_sta) 
    emp_tot=Data$empTOT               # total employment
   emp_rest=Data$emp_rest_both        # restaurant employment (NAICS 7221+7222)
  wage_rest=Data$AWW_rest_both        # restaurant average weekly wage
    emp_722=Data$emp722               # food services and drinking place employment
   wage_722=Data$AWW722               # food services and drinking place average weekly wage
    emp_ret=Data$empRETAIL            # retail employment
   wage_ret=Data$AWWRETAIL            # retail average weekly wage                                           
    emp_mfg=Data$empMFG               # manufacturing employment
   wage_mfg=Data$AWWMFG               # manufacturing average weekly wage

# State indices & names:
NT=length(state)                      # NT: full sample size 
sindx=matrix(0,NT,1)                  # state indices (1,2,3,...)

# Precluding Alaska & Hawaii 
sindx[which(state=="Alabama")]=1;               
sindx[which(state=="Arizona")]=2;               
sindx[which(state=="Arkansas")]=3;              
sindx[which(state=="California")]=4;            
sindx[which(state=="Colorado")]=5;              
sindx[which(state=="Connecticut")]=6;           
sindx[which(state=="Delaware")]=7;              
sindx[which(state=="District of Columbia")]=8;  
sindx[which(state=="Florida")]=9;               
sindx[which(state=="Georgia")]=10;              
sindx[which(state=="Idaho")]=11;                                                                                                              
sindx[which(state=="Illinois")]=12;                                                                                                              
sindx[which(state=="Indiana")]=13;                                                                                                              
sindx[which(state=="Iowa")]=14;                                                                                                              
sindx[which(state=="Kansas")]=15;                                                                                                              
sindx[which(state=="Kentucky")]=16;                                                                                                              
sindx[which(state=="Louisiana")]=17;                                                                                                              
sindx[which(state=="Maine")]=18;                                                                                                              
sindx[which(state=="Maryland")]=19;                                                                                                              
sindx[which(state=="Massachusetts")]=20;                                                                     
sindx[which(state=="Michigan")]=21;                                                                     
sindx[which(state=="Minnesota")]=22;                                                                     
sindx[which(state=="Mississippi")]=23;                                                                     
sindx[which(state=="Missouri")]=24;                                                                     
sindx[which(state=="Montana")]=25;                                                                     
sindx[which(state=="Nebraska")]=26;                                                                     
sindx[which(state=="Nevada")]=27;                                                                     
sindx[which(state=="New Hampshire")]=28;                                                                     
sindx[which(state=="New Jersey")]=29;                                                                     
sindx[which(state=="New Mexico")]=30;                           
sindx[which(state=="New York")]=31;                           
sindx[which(state=="North Carolina")]=32;                           
sindx[which(state=="North Dakota")]=33;                           
sindx[which(state=="Ohio")]=34;                           
sindx[which(state=="Oklahoma")]=35;                           
sindx[which(state=="Oregon")]=36;                           
sindx[which(state=="Pennsylvania")]=37;                           
sindx[which(state=="Rhode Island")]=38;                           
sindx[which(state=="South Carolina")]=39;                           
sindx[which(state=="South Dakota")]=40;         
sindx[which(state=="Tennessee")]=41;            
sindx[which(state=="Texas")]=42;                
sindx[which(state=="Utah")]=43;                 
sindx[which(state=="Vermont")]=44;              
sindx[which(state=="Virginia")]=45;             
sindx[which(state=="Washington")]=46;           
sindx[which(state=="West Virginia")]=47;        
sindx[which(state=="Wisconsin")]=48;            
sindx[which(state=="Wyoming")]=49;              

minci=min(cfips[which(sindx==1)]) 
Time=time[which(cfips==minci)]    # time index (1990Q1-2006Q2: 199001,199002,...,200602)
T=length(Time)                    # number of quarters 
N=49                              # number of states (precluding Alaska & Hawaii)
Y=cbind(wage_rest,emp_rest) 
y1=Y[,1]        # wages
y2=Y[,2]/100    # (rescaled) employments

 Sindx=matrix(0,N,1)      # state indices
 Sname=matrix(0,N,1)      # state names 
 Scode=matrix(0,N,1)      # state codes
   Smw=matrix(0,T,N)      # state min.wage time series
 SmwCh=matrix(0,T,N)      # state min.wage change time series

i=1
while(i<=N){
s_indx=sindx[which(sindx==i)]
s_name=state[which(sindx==i)]
Sindx[i]=s_indx[i]
Sname[i]=as.character(s_name[i])
if(Sname[i]=="Alabama"){Scode[i]="AL"}                                  
if(Sname[i]=="Arizona"){Scode[i]="AZ"}                                  
if(Sname[i]=="Arkansas"){Scode[i]="AR"}                                 
if(Sname[i]=="California"){Scode[i]="CA"}                               
if(Sname[i]=="Colorado"){Scode[i]="CO"}                                 
if(Sname[i]=="Connecticut"){Scode[i]="CT"}                              
if(Sname[i]=="Delaware"){Scode[i]="DE"}                                 
if(Sname[i]=="District of Columbia"){Scode[i]="DC"}                     
if(Sname[i]=="Florida"){Scode[i]="FL"}                                  
if(Sname[i]=="Georgia"){Scode[i]="GA"}                                  
if(Sname[i]=="Idaho"){Scode[i]="ID"}                                    
if(Sname[i]=="Illinois"){Scode[i]="IL"}                                 
if(Sname[i]=="Indiana"){Scode[i]="IN"}                                  
if(Sname[i]=="Iowa"){Scode[i]="IA"}                                     
if(Sname[i]=="Kansas"){Scode[i]="KS"}                                   
if(Sname[i]=="Kentucky"){Scode[i]="KY"}                                 
if(Sname[i]=="Louisiana"){Scode[i]="LA"}                                
if(Sname[i]=="Maine"){Scode[i]="ME"}                                    
if(Sname[i]=="Maryland"){Scode[i]="MD"}                                 
if(Sname[i]=="Massachusetts"){Scode[i]="MA"}                            
if(Sname[i]=="Michigan"){Scode[i]="MI"}                                 
if(Sname[i]=="Minnesota"){Scode[i]="MN"}                                
if(Sname[i]=="Mississippi"){Scode[i]="MS"}                              
if(Sname[i]=="Missouri"){Scode[i]="MO"}                                 
if(Sname[i]=="Montana"){Scode[i]="MT"}                                  
if(Sname[i]=="Nebraska"){Scode[i]="NE"}                                 
if(Sname[i]=="Nevada"){Scode[i]="NV"}                                   
if(Sname[i]=="New Hampshire"){Scode[i]="NH"}                            
if(Sname[i]=="New Jersey"){Scode[i]="NJ"}                               
if(Sname[i]=="New Mexico"){Scode[i]="NM"}                               
if(Sname[i]=="New York"){Scode[i]="NY"}                                 
if(Sname[i]=="North Carolina"){Scode[i]="NC"}                           
if(Sname[i]=="North Dakota"){Scode[i]="ND"}                             
if(Sname[i]=="Ohio"){Scode[i]="OH"}                                     
if(Sname[i]=="Oklahoma"){Scode[i]="OK"}                                 
if(Sname[i]=="Oregon"){Scode[i]="OR"}                                   
if(Sname[i]=="Pennsylvania"){Scode[i]="PA"}                             
if(Sname[i]=="Rhode Island"){Scode[i]="RI"}                             
if(Sname[i]=="South Carolina"){Scode[i]="SC"}                           
if(Sname[i]=="South Dakota"){Scode[i]="SD"}                             
if(Sname[i]=="Tennessee"){Scode[i]="TN"}                                
if(Sname[i]=="Texas"){Scode[i]="TX"}                                    
if(Sname[i]=="Utah"){Scode[i]="UT"}                                     
if(Sname[i]=="Vermont"){Scode[i]="VT"}                                  
if(Sname[i]=="Virginia"){Scode[i]="VA"}                                 
if(Sname[i]=="Washington"){Scode[i]="WA"}                               
if(Sname[i]=="West Virginia"){Scode[i]="WV"}                            
if(Sname[i]=="Wisconsin"){Scode[i]="WI"}                                
if(Sname[i]=="Wyoming"){Scode[i]="WY"}                                  
minci=min(cfips[which(sindx==i)])
nmw_sta=mw_sta
nmw_sta[is.na(nmw_sta)]=0
Smw[,i]=nmw_sta[which(cfips==minci)]
t=2
while(t<=T){
if(Smw[t,i]==0 & Smw[(t-1),i]>0){Smw[t,i]=Smw[(t-1),i]}
if(Smw[t,i]>Smw[(t-1),i]){SmwCh[t,i]=1}
t=t+1}
i=i+1}

# Federal min-wage series
minci=min(cfips[which(sindx==1)])
Fmw=mw_fed[which(cfips==minci)]

# Set (minN+1) to be the min. sample size for estimation
minN=20         

# Set quantile probability indices
tau=seq(0.01,0.99,0.01)     

Nit=matrix(0,T,N)             # (state,time)-specific sample size
Ni=matrix(0,N,1)              # state sample size, precluding missing values  
i=1
while(i<=N){
t=1
while(t<=T){
w=na.omit(Y[which(sindx==i & time==Time[t])])
Nit[t,i]=length(w)
t=t+1}
cat(Sname[i],max(Nit[,i]),"\n")
Ni[i]=min(Nit[,i])
i=i+1}
statesel=matrix(0,N,1) # state-selection dummies (Ni[i]>minN)
i=1
while(i<=N){
if(Ni[i]>minN){
statesel[i]=1                      # the ith state satisfies the min.# of obs. restriction      
}
i=i+1}
SSindx=Sindx[which(statesel==1)] # state-selection index (Ni[i]>minN)

#*********************************************************#
# 2. Four Case Studies (PreDura>20 and preclude outliers) #
#*********************************************************#
case=1
while(case<=4){
if(case==1){trt_name="California"; Pre_Smw=4.25; Size_Smwch=0.75; Pre_Emw=4.75; Size_Emwch=0.25; PreStart=199001; PreEnd=199604; PostStart=199701; PostEnd=199702; PreDura=28; PostDura=2 } 
if(case==2){trt_name="Oregon";     Pre_Smw=4.75; Size_Smwch=0.75; Pre_Emw=4.75; Size_Emwch=0.75; PreStart=199101; PreEnd=199604; PostStart=199701; PostEnd=199704; PreDura=24; PostDura=4 } 
if(case==3){trt_name="Washington"; Pre_Smw=4.9 ; Size_Smwch=0.8 ; Pre_Emw=5.15; Size_Emwch=0.55; PreStart=199401; PreEnd=199804; PostStart=199901; PostEnd=199904; PreDura=20; PostDura=4 } 
if(case==4){trt_name="Wisconsin";  Pre_Smw=3.65; Size_Smwch=2.05; Pre_Emw=5.15; Size_Emwch=0.55; PreStart=199001; PreEnd=200501; PostStart=200502; PostEnd=200602; PreDura=61; PostDura=4 } 
cat("case=",case,"Treated=",trt_name,"\n")

# Define time indices
a=which(Time==PreStart)   # time index of the start of a pre-intervention period  
b=which(Time==PreEnd)     # time index of the end of a pre-intervention period
c=which(Time==PostStart)  # time index of the start of a post-intervention period (policy-change point) 
d=which(Time==PostEnd)    # time index of the end of a post-intervention period    
Pre=Time[a:b]             # pre-intervention period
Post=Time[c:d]            # post-intervention period
Period=c(Pre,Post)        # case-study period
res=X_axis(Period,4)
Xtime=res[[1]]
Xaxis=res[[2]]
x_lab=res[[3]]

if(case==1){XData=read.csv(paste0(dirpath,"Data/","US_state_comparison1996.csv"))}
if(case==2){XData=read.csv(paste0(dirpath,"Data/","US_state_comparison1996.csv"))}
if(case==3){XData=read.csv(paste0(dirpath,"Data/","US_state_comparison1998.csv"))}
if(case==4){XData=read.csv(paste0(dirpath,"Data/","US_state_comparison2004.csv"))}

XData=as.matrix(XData[,-1])
X0_pop=XData[1,]       # Resident population
X0_white=XData[2,]     # White alone, percentage
X0_old=XData[3,]       # 65 years old and over, percentage
X0_land=XData[4,]      # Land area
X0_pden=XData[5,]      # Population per square mile (preclude this from the matching condition because it is redundant)
X0_pov=XData[6,]       # Person below poverty level, percentage
X0_pinc=XData[7,]      # Personal income per capita
X0_hinc=XData[8,]      # Real median household income
X0_emp=XData[9,]       # Annual nonfarm employment
X0_nhu=XData[10,]      # New private housing units authorized by buliding permits

# All variables in (relative) percentage
X_pop=100*(X0_pop/max(X0_pop))
X_white=100*(X0_white/max(X0_white))
X_old=100*(X0_old/max(X0_old))
X_land=100*(X0_land/max(X0_land))
X_pov=100*(X0_pov/max(X0_pov))
X_pinc=100*(X0_pinc/max(X0_pinc))
X_hinc=100*(X0_hinc/max(X0_hinc))
X_emp=100*(X0_emp/max(X0_emp))
X_nhu=100*(X0_nhu/max(X0_nhu))

Xecon=as.matrix(rbind(X_pop,X_white,X_old,X_land,X_pov,X_pinc,X_hinc,X_emp,X_nhu))


##################################################
# I. Synthetic Control Analysis for Wage Effects # 
##################################################
#-----------------------------------#
# 1. Define potential control units #
#-----------------------------------#
k=2                   # k=0, for contant; k=1, for linear trend; k=2, for quadratic trend                     
res=Data_trtpcu(1,a,d,trt_name,SSindx,Sname,Sindx,SmwCh,Scode,Xecon)
pcu_indx0=res[[6]]    # state indices of potential control units

#---------------------------------------------------#
# 2. Check outliers for the potential control units #
#---------------------------------------------------#
outlier=100  # Threshold for defining an outlier
cr=rainbow(length(tau),s=1,v=1,start=0,end=0.9) # Rainbow colors for the quantile sequences
par(mfrow=c(8,4),mar=c(2,2,2,2))
S=d-a+1
Oindx0=0
j=1
while(j<=length(pcu_indx0)){
i=pcu_indx0[j]
wm=matrix(0,S,1)
wq=matrix(0,S,length(tau))
wQ=matrix(0,S,1)
if(Ni[i]>minN){
mt=as.character(Sname[i])
s=1
while(s<=S){
t=a+s-1
w=na.omit(y1[which(sindx==i & time==Time[t])])
wm[s]=mean(w)                        # mean
wq[s,]=quantile(w,tau)      # quantile 
wQ[s]=max(w)
s=s+1}
matplot(Xtime,wq,main=Sname[i],type="l",pch=19,col=cr,cex=0.5,xlab=" ",ylab=" ",xaxt="n")
lines(Xtime,wm,type="l",lty=1,col=1,lwd=2)
axis(1,at=Xaxis,labels=x_lab)
tt=which.max(wQ)
if(tt==1){DwQ=wQ[tt]-wQ[tt+1]}
if(tt==S){DwQ=wQ[tt]-wQ[tt-1]}
if(tt>1 & tt<S){DwQ=max((wQ[tt]-wQ[tt-1]),(wQ[tt]-wQ[tt+1]))}
if(DwQ>outlier){Oindx0=c(Oindx0,i)             # Definition of outliers

### Footnote 3
print("Outliers detected")
cat(case,Sname[i],Sindx[i],Time[tt],DwQ,"\n")}}
j=j+1}
par(mfrow=c(1,1))
Oindx=Oindx0[-1]

#-----------------------------------------------------------------#
# 3. Remove states with outliers from the potential control units #
#-----------------------------------------------------------------#
RS=matrix(0,length(Oindx),1)
RP=matrix(0,length(Oindx),1)
i=1
while(i<=length(Oindx)){
RS[i]=which(SSindx==Oindx[i])
RP[i]=which(pcu_indx0==Oindx[i])
i=i+1}
SSindx1=SSindx[-RS]               # states without missing values & without outliers
pcu_indx=pcu_indx0[-RP]           # potential control units  without outliers
N_pcu=length(pcu_indx)            # number of such potential control units

#--------------------------------------------#
# 4. Synthetic controls for the treated unit #
#--------------------------------------------#
TRT=matrix(0,(d-a+1),length(tau))                 # Quantile time series of the treated unit      
SYN=matrix(0,(d-a+1),length(tau))                 # Quantile time series of the synthetic control unit (Proposed)
SYN0=matrix(0,(d-a+1),length(tau))                # Quantile time series of the synthetic control unit (Traditional)
wSYN=matrix(0,N_pcu,length(tau))                  # Optimal weights for the quantile time series of the synthetic control unit (Proposed)
wSYN0=matrix(0,N_pcu,length(tau))                 # Optimal weights for the quantile time series of the synthetic control unit (Traditional)
trtPre_fit=matrix(0,(b-a+1),length(tau))          # Fitted quantile trends of the treated unit
pcuPre_fit=matrix(0,(b-a+1),(N_pcu*length(tau)))  # Fitted quantile trends of the potential control units 

q=1
while(q<=(length(tau))){
# (1) Generate the q-th quantile sequences of the treated unit and the potential control units
res=Data_trtpcu(q,a,d,trt_name,SSindx1,Sname,Sindx,SmwCh,Scode,Xecon)
trt=res[[1]]
pcu=res[[2]]
trt_x=res[[3]]
pcu_x=res[[4]]
pcu_code=res[[8]]

# (2) Generate the q-th quantile of the synthetic control unit
res=counterfactual(k,trt,pcu,trt_x,pcu_x,Period,a,b,c,d)
syn=res[[1]]
syn0=res[[2]]
wSYN[,q]=res[[3]]
wSYN0[,q]=res[[4]]
trtPre_fit[,q]=res[[5]]
pcuPre_fit[,(N_pcu*(q-1)+1):(N_pcu*q)]=res[[6]]
trt_alpha=res[[7]]
pcu_alpha=res[[8]]

# (3) Keep outputs
TRT[,q]=trt[a:d]
SYN[,q]=syn
SYN0[,q]=syn0

# (4) Plot: Compare the quantile sequences of the treated unit and the synthetic control unit
tauq=tau[q]
taue=expression(tau)
mt=paste0(trt_name," ",PostStart," "," (", taue,"=",tauq,")")                                          
matplot(Xtime,cbind(trt[a:d],syn,syn0),main=mt,type="b",col=c(2,4,1),lwd=c(3,3,2),pch=19,cex=0.5,xlab=" ",ylab=" ",xaxt="n")
abline(v=Xtime[(b-a+1)],lty=5,lwd=2,col="darkblue")
axis(1,at=Xaxis,labels=x_lab)
q=q+1}

# (5) Save outputs
write.csv(pcu_indx,file=paste0(dirpath,"Data/Results/","pcu_indx_",case,".csv"))
write.csv(TRT,file=paste0(dirpath,"Data/Results/","TRT_",case,".csv"))
write.csv(SYN,file=paste0(dirpath,"Data/Results/","SYN_",case,".csv"))
write.csv(SYN0,file=paste0(dirpath,"Data/Results/","SYN0_",case,".csv"))
write.csv(wSYN,file=paste0(dirpath,"Data/Results/","wSYN_",case,".csv"))
write.csv(wSYN0,file=paste0(dirpath,"Data/Results/","wSYN0_",case,".csv"))
write.csv(trtPre_fit,file=paste0(dirpath,"Data/Results/","trtPre_fit_",case,".csv"))
write.csv(pcuPre_fit,file=paste0(dirpath,"Data/Results/","pcuPre_fit_",case,".csv"))

#-----------------------------------------------------------------------#
# 5. Synthetic controls for a potential control unit (Placebo Analysis) #
#-----------------------------------------------------------------------#
J=N_pcu   # Number of the potential control units
j=1
while(j<=J){
plb_indx=pcu_indx[j] # Placebo: Treat a potential control unit as a treated unit.
plb_trt_name=Sname[plb_indx]
cat("case=",case,"(","Treated=",trt_name,Time[c],")","Placebo=",j,plb_trt_name,"\n")

res=Data_trtpcu(1,a,d,plb_trt_name,SSindx1,Sname,Sindx,Smw,Scode,Xecon)
plb_trt_x=res[[3]]        # econ. features of the treated unit
plb_pcu_x=res[[4]]        # econ. features of the potential control units
plb_N_pcu=res[[5]]        # number of potential control units
plb_pcu_indx=res[[6]]     # state indices of potential control units
plb_TRT=matrix(0,(d-a+1),length(tau))                     # Quantile time series of the treated unit      
plb_SYN=matrix(0,(d-a+1),length(tau))                     # Quantile time series of the synthetic control unit (Proposed)
plb_SYN0=matrix(0,(d-a+1),length(tau))                    # Quantile time series of the synthetic control unit (Traditional)
plb_wSYN=matrix(0,plb_N_pcu,length(tau))                  # Optimal weights for the quantile time series of the synthetic control unit (Proposed)
plb_wSYN0=matrix(0,plb_N_pcu,length(tau))                 # Optimal weights for the quantile time series of the synthetic control unit (Traditional)
plb_trtPre_fit=matrix(0,(b-a+1),length(tau))              # Fitted quantile trends of the treated unit
plb_pcuPre_fit=matrix(0,(b-a+1),(plb_N_pcu*length(tau)))  # Fitted quantile trends of the potential control units 

q=1
while(q<=length(tau)){
# (1) Generate the q-th quantile sequences of the placebo (treated) unit and its potential control units
res=Data_trtpcu(q,a,d,plb_trt_name,SSindx1,Sname,Sindx,Smw,Scode,Xecon)
plb_trt=res[[1]]
plb_pcu=res[[2]]
plb_trt_x=res[[3]]
plb_pcu_x=res[[4]]
# (2) Generate the q-th quantile of the synthetic control unit
res=counterfactual(k,plb_trt,plb_pcu,plb_trt_x,plb_pcu_x,Period,a,b,c,d)
plb_syn=res[[1]]
plb_syn0=res[[2]]
plb_wSYN[,q]=res[[3]]
plb_wSYN0[,q]=res[[4]]
plb_trtPre_fit[,q]=res[[5]]
plb_pcuPre_fit[,(plb_N_pcu*(q-1)+1):(plb_N_pcu*q)]=res[[6]]
plb_trt_alpha=res[[7]]
plb_pcu_alpha=res[[8]]
# (3) Keep outputs
plb_TRT[,q]=plb_trt[a:d]
plb_SYN[,q]=plb_syn
plb_SYN0[,q]=plb_syn0
# (4) Plot: Compare the quantile sequences of the placebo unit and its synthetic control unit
tauq=tau[q]
taue=expression(tau)
mt=paste0(plb_trt_name," ","(Placebo)",PostStart," "," (", taue,"=",tauq,")")                                          
matplot(Xtime,cbind(plb_trt[a:d],plb_syn,plb_syn0),main=mt,type="b",col=c(2,4,1),lwd=c(3,3,2),pch=19,cex=0.5,xlab=" ",ylab=" ",xaxt="n")
abline(v=Xtime[(b-a+1)],lty=5,lwd=2,col="darkblue")
axis(1,at=Xaxis,labels=x_lab)
q=q+1}
# (5) Save outputs
write.csv(plb_pcu_indx,file=paste0(dirpath,"Data/Results/","plb_pcu_indx_",case,"_",j,".csv"))
write.csv(plb_TRT,file=paste0(dirpath,"Data/Results/","plb_TRT_",case,"_",j,".csv"))
write.csv(plb_SYN,file=paste0(dirpath,"Data/Results/","plb_SYN_",case,"_",j,".csv"))
write.csv(plb_SYN0,file=paste0(dirpath,"Data/Results/","plb_SYN0_",case,"_",j,".csv"))
write.csv(plb_wSYN,file=paste0(dirpath,"Data/Results/","plb_wSYN_",case,"_",j,".csv"))
write.csv(plb_wSYN0,file=paste0(dirpath,"Data/Results/","plb_wSYN0_",case,"_",j,".csv"))
write.csv(plb_trtPre_fit,file=paste0(dirpath,"Data/Results/","plb_trtPre_fit_",case,"_",j,".csv"))
write.csv(plb_pcuPre_fit,file=paste0(dirpath,"Data/Results/","plb_pcuPre_fit_",case,"_",j,".csv"))
j=j+1}


#########################################################
# II. Synthetic Control Analysis for Employment Effects # 
#########################################################
#--------------------------------------------#
# 1. Synthetic controls for the treated unit #
#--------------------------------------------#
TRTemp=matrix(0,(d-a+1),length(tau))                 # Quantile time series of the treated unit      
SYNemp=matrix(0,(d-a+1),length(tau))                 # Quantile time series of the synthetic control unit (Proposed)
SYN0emp=matrix(0,(d-a+1),length(tau))                # Quantile time series of the synthetic control unit (Traditional)
wSYNemp=matrix(0,N_pcu,length(tau))                  # Optimal weights for the quantile time series of the synthetic control unit (Proposed)
wSYN0emp=matrix(0,N_pcu,length(tau))                 # Optimal weights for the quantile time series of the synthetic control unit (Traditional)
trtPreemp_fit=matrix(0,(b-a+1),length(tau))          # Fitted quantile trends of the treated unit
pcuPreemp_fit=matrix(0,(b-a+1),(N_pcu*length(tau)))  # Fitted quantile trends of the potential control units 

q=1
while(q<=(length(tau))){
# (1) Generate the q-th quantile sequences of the treated unit and the potential control units
res=Dataemp_trtpcu(q,a,d,trt_name,SSindx1,Sname,Sindx,SmwCh,Scode,Xecon)
trtemp=res[[1]]
pcuemp=res[[2]]
trtemp_x=res[[3]]
pcuemp_x=res[[4]]
pcuemp_code=res[[8]]

## Check quantile
#pcu_q=matrix(0,length(tau),ncol(pcu))
#i=1
#while(i<=ncol(pcu)){
#pcu_q[,i]=quantile(pcu[,i],tau)
#i=i+1}
#matplot(tau,pcu_q)
#trt_q=quantile(trt,tau)
#plot(tau,trt_q) 

# (2) Generate the q-th quantile of the synthetic control unit
res=counterfactual(k,trtemp,pcuemp,trtemp_x,pcuemp_x,Period,a,b,c,d)
synemp=res[[1]]
syn0emp=res[[2]]
wSYNemp[,q]=res[[3]]
wSYN0emp[,q]=res[[4]]
trtPreemp_fit[,q]=res[[5]]
pcuPreemp_fit[,(N_pcu*(q-1)+1):(N_pcu*q)]=res[[6]]
trtemp_alpha=res[[7]]
pcuemp_alpha=res[[8]]
# (3) Keep outputs
TRTemp[,q]=trtemp[a:d]
SYNemp[,q]=synemp
SYN0emp[,q]=syn0emp
# (4) Plot: Compare the quantile sequences of the treated unit and the synthetic control unit
tauq=tau[q]
taue=expression(tau)
mt=paste0(trt_name," ",PostStart," "," (", taue,"=",tauq,")")                                          
matplot(Xtime,cbind(trtemp[a:d],synemp,syn0emp),main=mt,type="b",col=c(2,4,1),lwd=c(3,3,2),pch=19,cex=0.5,xlab=" ",ylab=" ",xaxt="n")
abline(v=Xtime[(b-a+1)],lty=5,lwd=2,col="darkblue")
axis(1,at=Xaxis,labels=x_lab)
q=q+1}
# (5) Save outputs
write.csv(TRTemp,file=paste0(dirpath,"Data/Results/","TRTemp_",case,".csv"))
write.csv(SYNemp,file=paste0(dirpath,"Data/Results/","SYNemp_",case,".csv"))
write.csv(SYN0emp,file=paste0(dirpath,"Data/Results/","SYN0emp_",case,".csv"))
write.csv(wSYNemp,file=paste0(dirpath,"Data/Results/","wSYNemp_",case,".csv"))
write.csv(wSYN0emp,file=paste0(dirpath,"Data/Results/","wSYN0emp_",case,".csv"))
write.csv(trtPreemp_fit,file=paste0(dirpath,"Data/Results/","trtPreemp_fit_",case,".csv"))
write.csv(pcuPreemp_fit,file=paste0(dirpath,"Data/Results/","pcuPreemp_fit_",case,".csv"))

#-----------------------------------------------------------------------#
# 2. Synthetic controls for a potential control unit (Placebo Analysis) #
#-----------------------------------------------------------------------#
j=1
while(j<=J){
plb_indx=pcu_indx[j] # Placebo: Treat a potential control unit as a treated unit.
plbemp_trt_name=Sname[plb_indx]
cat("case=",case,"(","Treated=",trt_name,Time[c],")","Placebo=",j,plbemp_trt_name,"\n")

res=Dataemp_trtpcu(1,a,d,plbemp_trt_name,SSindx1,Sname,Sindx,Smw,Scode,Xecon)
plbemp_trt_x=res[[3]]        # econ. features of the treated unit
plbemp_pcu_x=res[[4]]        # econ. features of the potential control units
plbemp_N_pcu=res[[5]]        # number of potential control units
plbemp_pcu_indx=res[[6]]     # state indices of potential control units
plbemp_TRT=matrix(0,(d-a+1),length(tau))                     # Quantile time series of the treated unit      
plbemp_SYN=matrix(0,(d-a+1),length(tau))                     # Quantile time series of the synthetic control unit (Proposed)
plbemp_SYN0=matrix(0,(d-a+1),length(tau))                    # Quantile time series of the synthetic control unit (Traditional)
plbemp_wSYN=matrix(0,plbemp_N_pcu,length(tau))                  # Optimal weights for the quantile time series of the synthetic control unit (Proposed)
plbemp_wSYN0=matrix(0,plbemp_N_pcu,length(tau))                 # Optimal weights for the quantile time series of the synthetic control unit (Traditional)
plbemp_trtPre_fit=matrix(0,(b-a+1),length(tau))              # Fitted quantile trends of the treated unit
plbemp_pcuPre_fit=matrix(0,(b-a+1),(plbemp_N_pcu*length(tau)))  # Fitted quantile trends of the potential control units 

q=1
while(q<=length(tau)){
# (1) Generate the q-th quantile sequences of the placebo (treated) unit and its potential control units
res=Dataemp_trtpcu(q,a,d,plbemp_trt_name,SSindx1,Sname,Sindx,Smw,Scode,Xecon)
plbemp_trt=res[[1]]
plbemp_pcu=res[[2]]
plbemp_trt_x=res[[3]]
plbemp_pcu_x=res[[4]]
# (2) Generate the q-th quantile of the synthetic control unit
res=counterfactual(k,plbemp_trt,plbemp_pcu,plbemp_trt_x,plbemp_pcu_x,Period,a,b,c,d)
plbemp_syn=res[[1]]
plbemp_syn0=res[[2]]
plbemp_wSYN[,q]=res[[3]]
plbemp_wSYN0[,q]=res[[4]]
plbemp_trtPre_fit[,q]=res[[5]]
plbemp_pcuPre_fit[,(plbemp_N_pcu*(q-1)+1):(plbemp_N_pcu*q)]=res[[6]]
plbemp_trt_alpha=res[[7]]
plbemp_pcu_alpha=res[[8]]
# (3) Keep outputs
plbemp_TRT[,q]=plbemp_trt[a:d]
plbemp_SYN[,q]=plbemp_syn
plbemp_SYN0[,q]=plbemp_syn0
# (4) Plot: Compare the quantile sequences of the placebo unit and its synthetic control unit
tauq=tau[q]
taue=expression(tau)
mt=paste0(plbemp_trt_name," ","(Placebo)",PostStart," "," (", taue,"=",tauq,")")                                          
matplot(Xtime,cbind(plbemp_trt[a:d],plbemp_syn,plbemp_syn0),main=mt,type="b",col=c(2,4,1),lwd=c(3,3,2),pch=19,cex=0.5,xlab=" ",ylab=" ",xaxt="n")
abline(v=Xtime[(b-a+1)],lty=5,lwd=2,col="darkblue")
axis(1,at=Xaxis,labels=x_lab)
q=q+1}
# (5) Save outputs
write.csv(plbemp_pcu_indx,file=paste0(dirpath,"Data/Results/","plbemp_pcu_indx_",case,"_",j,".csv"))
write.csv(plbemp_TRT,file=paste0(dirpath,"Data/Results/","plbemp_TRT_",case,"_",j,".csv"))
write.csv(plbemp_SYN,file=paste0(dirpath,"Data/Results/","plbemp_SYN_",case,"_",j,".csv"))
write.csv(plbemp_SYN0,file=paste0(dirpath,"Data/Results/","plbemp_SYN0_",case,"_",j,".csv"))
write.csv(plbemp_wSYN,file=paste0(dirpath,"Data/Results/","plbemp_wSYN_",case,"_",j,".csv"))
write.csv(plbemp_wSYN0,file=paste0(dirpath,"Data/Results/","plbemp_wSYN0_",case,"_",j,".csv"))
write.csv(plbemp_trtPre_fit,file=paste0(dirpath,"Data/Results/","plbemp_trtPre_fit_",case,"_",j,".csv"))
write.csv(plbemp_pcuPre_fit,file=paste0(dirpath,"Data/Results/","plbemp_pcuPre_fit_",case,"_",j,".csv"))
j=j+1}

case=case+1}


########### END HERE ###############
