rm(list=ls())

library(utils) 
library(data.table) 
library(foreign) #read.dta, inport data
library(tidyr) #unite and separate paste0
library(readstata13) #read dta files writen in STATa13
library(zoo) #as.yearqtr
library(psych)

#set working directory to folder location
setwd("")


#-------------------------#
#    Building  Main Data 
#-------------------------#

data = read.dta13("data/DSS_Data/sample_call_final.dta") 


data$dateqtr = as.yearqtr(as.Date(as.character(data$date), "%Y-%m-%d"), format="%YQ%q")
data$quarter = quarter(data$dateqtr)

data = data[which(data$quarter == 4),]

colnames(data)[colnames(data)=="bankid"] = "rssd_id"

colnames(data)[colnames(data)=="ff_tar"] = "fftar_end"

mydata = subset(data,select=c(assets_2010,assets,herfdepcty,year,rssd_id,fftar_end))

mydata = data.table(mydata)

mydata[, l1_herfdepcty := shift(herfdepcty), by=c("rssd_id")]

mydata = mydata[,-c("herfdepcty")]

colnames(mydata)[colnames(mydata)=="l1_herfdepcty"] = "herfdepcty"

cra_short = read.dta13("data/DSS_Data/cra_short.dta")
mydata <- merge(mydata,cra_short,by = c("rssd_id","year"),all=F)

mydata = mydata[order(mydata$year),]

ff_tar_yearend = read.dta13("data/DSS_Data/ff_tar_yearend.dta")
mydata <- merge(mydata,ff_tar_yearend,by = c("year"),all=F)

mydata = mydata[order(mydata$year),]

cpi_fred_year = read.dta13("data/DSS_Data/cpi_fred_year.dta")
mydata <- merge(mydata,cpi_fred_year,by = c("year"),all=F)

colnames(mydata)[colnames(mydata)=="rssd_id"] = "bankid"

mydata$fips = 1000*mydata$state+mydata$county
mydata$fipsyear = 10000*mydata$fips+mydata$year
mydata$fipsbank = 100000*mydata$bankid+mydata$fips

mydata = mydata[order(mydata$fips),]

avgherfdepcty = read.dta13("data/DSS_Data/avgherfdepcty.dta")
mydata <- merge(mydata,avgherfdepcty,by = c("fips"),all=F)

mydata = mydata[order(mydata$bankid,mydata$year),]

mydata$d1_ffm_herfdepcty=mydata$d1_fftar_yearend*mydata$herfdepcty
mydata$d1_ffm_avgherfdepcty=mydata$d1_fftar_yearend*mydata$avgherfdepcty

mydata$tot_amt=mydata$small_loan_amt+mydata$medium_loan_amt+mydata$large_loan_amt
mydata$log_tot_amt=log(mydata$small_loan_amt+mydata$medium_loan_amt+mydata$large_loan_amt) 

mydata = mydata[which(mydata$year>=1997),]
mydata = mydata[order(mydata$fipsbank),]
mydata = mydata[order(mydata$year),]

#****************************
# SAVE FOR COUNTY RESULTS!!!!

write.csv(mydata,row.names = F,col.names = T, "data/cra_bank_county.csv", sep="\t")

#****************************

mydata$tot_amt_2010 = mydata$tot_amt/mydata$cpi

mydata = mydata[order(mydata$fipsbank),]
mydata = mydata[order(mydata$year),]

mydata <- data.table(mydata)
mydata[,avg_tot_amt := mean(tot_amt_2010), by = c("fipsbank")]


#DSS baseline results (p.1839 "We include all bank-county observations with at least $100,000 of new lending.")
mydata = mydata[which(mydata$tot_amt_2010>=100),]

mydata$zerolower = ifelse(mydata$year>=2009,1,0)

mydata$bankidzero = paste0(mydata$bankid,mydata$zerolower)
mydata$fipszero = paste0(mydata$fips,mydata$zerolower)
mydata$fipsbankzero = paste0(mydata$fipsbank,mydata$zerolower)

mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.infinite(x),NA)))
mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.nan(x),NA)))

mydata = as.data.frame(mydata)

mydata = mydata[complete.cases(mydata[,c("log_tot_amt","d1_fftar_yearend","herfdepcty","avgherfdepcty",
                                         "fipsbank","bankid","year","fipszero","fips")]),]

fipsyear_count <- aggregate(x = mydata$fipsyear, by = list(unique.values = mydata$fipsyear), FUN = length)
fipsyear_count1 = fipsyear_count[which(fipsyear_count$x >= 2),]

fipsbank_count <- aggregate(x = mydata$fipsbank, by = list(unique.values = mydata$fipsbank), FUN = length)
fipsbank_count1 = fipsbank_count[which(fipsbank_count$x >= 2),]

bankid_count <- aggregate(x = mydata$bankid, by = list(unique.values = mydata$bankid), FUN = length)
bankid_count1 = bankid_count[which(bankid_count$x >= 2),]

year_count <- aggregate(x = mydata$year, by = list(unique.values = mydata$year), FUN = length)
year_count1 = year_count[which(year_count$x >= 2),]

fips_count <- aggregate(x = mydata$fips, by = list(unique.values = mydata$fips), FUN = length)
fips_count1 = fips_count[which(fips_count$x >= 2),]

fipszero_count <- aggregate(x = mydata$fipszero, by = list(unique.values = mydata$fipszero), FUN = length)
fipszero_count1 = fipszero_count[which(fipszero_count$x >= 2),]

mydata = mydata[which(mydata$fipsbank %in% fipsbank_count1$unique.values &
                                  mydata$fipsyear %in% fipsyear_count1$unique.values &
                                  mydata$bankid %in% bankid_count1$unique.values &
                                  mydata$year %in% year_count1$unique.values&
                                  mydata$fips %in% fips_count1$unique.values&
                                  mydata$fipszero %in% fipszero_count1$unique.values),]

#Adding Lerner index
lerner_data = read.csv("data/Lerner_data.csv")
lerner_data <- data.table(lerner_data)
lerner_data[,Lerner.l1 := shift(Lerner), by = c("bankid")]

mydata = merge(mydata,lerner_data,by = c("year","bankid"),all.x = T)


##Save##
write.csv(mydata,row.names = F,col.names = T, "data/mydata_loan.csv", sep="\t")


#------------------------------------------#
#  Excluding banks with < $1 bil is assets 
#------------------------------------------#

data = read.dta13("data/DSS_Data/sample_call_final.dta") 


data$dateqtr = as.yearqtr(as.Date(as.character(data$date), "%Y-%m-%d"), format="%YQ%q")
data$quarter = quarter(data$dateqtr)

data = data[which(data$quarter == 4),]

colnames(data)[colnames(data)=="bankid"] = "rssd_id"

colnames(data)[colnames(data)=="ff_tar"] = "fftar_end"

mydata = subset(data,select=c(assets_2010,assets,herfdepcty,year,rssd_id,fftar_end))

mydata = data.table(mydata)

mydata[, l1_herfdepcty := shift(herfdepcty), by=c("rssd_id")]

mydata = mydata[,-c("herfdepcty")]

colnames(mydata)[colnames(mydata)=="l1_herfdepcty"] = "herfdepcty"

cra_short = read.dta13("data/DSS_Data/cra_short.dta")
mydata <- merge(mydata,cra_short,by = c("rssd_id","year"),all=F)

mydata = mydata[order(mydata$year),]

ff_tar_yearend = read.dta13("data/DSS_Data/ff_tar_yearend.dta")
mydata <- merge(mydata,ff_tar_yearend,by = c("year"),all=F)

mydata = mydata[order(mydata$year),]

cpi_fred_year = read.dta13("data/DSS_Data/cpi_fred_year.dta")
mydata <- merge(mydata,cpi_fred_year,by = c("year"),all=F)

colnames(mydata)[colnames(mydata)=="rssd_id"] = "bankid"

mydata$fips = 1000*mydata$state+mydata$county
mydata$fipsyear = 10000*mydata$fips+mydata$year
mydata$fipsbank = 100000*mydata$bankid+mydata$fips

mydata = mydata[order(mydata$fips),]

avgherfdepcty = read.dta13("data/DSS_Data/avgherfdepcty.dta")
mydata <- merge(mydata,avgherfdepcty,by = c("fips"),all=F)

mydata = mydata[order(mydata$bankid,mydata$year),]

mydata$d1_ffm_herfdepcty=mydata$d1_fftar_yearend*mydata$herfdepcty
mydata$d1_ffm_avgherfdepcty=mydata$d1_fftar_yearend*mydata$avgherfdepcty

mydata$tot_amt=mydata$small_loan_amt+mydata$medium_loan_amt+mydata$large_loan_amt
mydata$log_tot_amt=log(mydata$small_loan_amt+mydata$medium_loan_amt+mydata$large_loan_amt) 

mydata = mydata[which(mydata$year>=1997),]
mydata = mydata[order(mydata$fipsbank),]
mydata = mydata[order(mydata$year),]

mydata = mydata[which(mydata$assets>=1000000),]

#****************************
# SAVE FOR COUNTY RESULTS!!!!

write.csv(mydata,row.names = F,col.names = T, "data/cra_bank_county_1bil.csv", sep="\t")

#****************************

mydata$tot_amt_2010 = mydata$tot_amt/mydata$cpi

mydata = mydata[order(mydata$fipsbank),]
mydata = mydata[order(mydata$year),]

mydata <- data.table(mydata)
mydata[,avg_tot_amt := mean(tot_amt_2010), by = c("fipsbank")]


#DSS baseline results (p.1839 "We include all bank-county observations with at least $100,000 of new lending.")
mydata = mydata[which(mydata$tot_amt_2010>=100),]

mydata$zerolower = ifelse(mydata$year>=2009,1,0)

mydata$bankidzero = paste0(mydata$bankid,mydata$zerolower)
mydata$fipszero = paste0(mydata$fips,mydata$zerolower)
mydata$fipsbankzero = paste0(mydata$fipsbank,mydata$zerolower)

mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.infinite(x),NA)))
mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.nan(x),NA)))

mydata = as.data.frame(mydata)

mydata = mydata[complete.cases(mydata[,c("log_tot_amt","d1_fftar_yearend","herfdepcty","avgherfdepcty",
                                         "fipsbank","bankid","year","fipszero","fips")]),]

fipsyear_count <- aggregate(x = mydata$fipsyear, by = list(unique.values = mydata$fipsyear), FUN = length)
fipsyear_count1 = fipsyear_count[which(fipsyear_count$x >= 2),]

fipsbank_count <- aggregate(x = mydata$fipsbank, by = list(unique.values = mydata$fipsbank), FUN = length)
fipsbank_count1 = fipsbank_count[which(fipsbank_count$x >= 2),]

bankid_count <- aggregate(x = mydata$bankid, by = list(unique.values = mydata$bankid), FUN = length)
bankid_count1 = bankid_count[which(bankid_count$x >= 2),]

year_count <- aggregate(x = mydata$year, by = list(unique.values = mydata$year), FUN = length)
year_count1 = year_count[which(year_count$x >= 2),]

fips_count <- aggregate(x = mydata$fips, by = list(unique.values = mydata$fips), FUN = length)
fips_count1 = fips_count[which(fips_count$x >= 2),]

fipszero_count <- aggregate(x = mydata$fipszero, by = list(unique.values = mydata$fipszero), FUN = length)
fipszero_count1 = fipszero_count[which(fipszero_count$x >= 2),]

mydata = mydata[which(mydata$fipsbank %in% fipsbank_count1$unique.values &
                        mydata$fipsyear %in% fipsyear_count1$unique.values &
                        mydata$bankid %in% bankid_count1$unique.values &
                        mydata$year %in% year_count1$unique.values&
                        mydata$fips %in% fips_count1$unique.values&
                        mydata$fipszero %in% fipszero_count1$unique.values),]

#Adding Lerner index
lerner_data = read.csv("data/Lerner_data.csv")
lerner_data <- data.table(lerner_data)
lerner_data[,Lerner.l1 := shift(Lerner), by = c("bankid")]

mydata = merge(mydata,lerner_data,by = c("year","bankid"),all.x = T)




##Save##
write.csv(mydata,row.names = F,col.names = T, "data/mydata_loan_1bil.csv", sep="\t")




