rm(list=ls())

library(utils) #for save.csv
library(data.table) #importing from the web using fread
library(foreign) #read.dta, inport data
library(tidyr) #unite and separate paste0
library(lfe) #for lm with fixed effect (felm)
library(readstata13) #read dta files writen in STATa13
library(stargazer) 
library(zoo) #as.yearqtr

setwd("")

data_save = read.csv("data/cra_bank_county.csv")
data = data_save

data = data[order(data$fips),]
data = data[order(data$year),]
data <- data.table(data)

#weighted bank hhi using tot_amt as weightes
data[,fips_amt := sum(tot_amt), by = c("fips","year")]

data$bank_fips_amt = data$tot_amt/data$fips_amt

data$herfdepcty_amt=data$bank_fips_amt*data$herfdepcty

herfdepcty_fips = subset(data, select = c(fips,year,herfdepcty_amt))
herfdepcty_fips = herfdepcty_fips[complete.cases(herfdepcty_fips$herfdepcty_amt),]
herfdepcty_fips <- aggregate(herfdepcty_fips$herfdepcty_amt, by = list(herfdepcty_fips$fips, herfdepcty_fips$year), FUN = "sum")
colnames(herfdepcty_fips)= c("fips","year","herfdepcty_amt")

mydata = subset(data, select = c(fips,year,tot_amt))
mydata <- aggregate(mydata$tot_amt, by = list(mydata$fips, mydata$year), FUN = "sum")
colnames(mydata)= c("fips","year","tot_amt")

avgherfdepcty = read.dta13("data/DSS_Data/avgherfdepcty.dta")
mydata <- merge(mydata,avgherfdepcty,by = c("fips"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

mydata <- merge(mydata,herfdepcty_fips,by = c("fips","year"),all=F)

mydata = mydata[order(mydata$year),]

ff_tar_yearend = read.dta13("data/DSS_Data/ff_tar_yearend.dta")
mydata <- merge(mydata,ff_tar_yearend,by = c("year"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

bls_final = read.dta13("data/DSS_Data/bls_final.dta")
mydata <- merge(mydata,bls_final,by = c("fips","year"),all=F)

mydata$d1_ffm_herfdepcty_amt=mydata$d1_fftar_yearend*mydata$herfdepcty_amt

mydata$d1_ffm_avgherfdepcty=mydata$d1_fftar_yearend*mydata$avgherfdepcty

mydata$log_tot_amt=log(mydata$tot_amt)

mydata$zerolower = ifelse(mydata$year>=2009,1,0)

mydata$fipszero = paste0(mydata$fips,mydata$zerolower)

mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.infinite(x),NA)))
mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.nan(x),NA)))

mydata = as.data.frame(mydata)

mydata = mydata[complete.cases(mydata$log_tot_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_avgherfdepcty),]
mydata = mydata[complete.cases(mydata$herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_lnemp),]
mydata = mydata[complete.cases(mydata$d1_lntotwage),]

fips_count <- aggregate(x = mydata$fips, by = list(unique.values = mydata$fips), FUN = length)
fips_count1 = fips_count[which(fips_count$x >= 2),]

fipszero_count <- aggregate(x = mydata$fipszero, by = list(unique.values = mydata$fipszero), FUN = length)
fipszero_count1 = fipszero_count[which(fipszero_count$x >= 2),]

mydata = mydata[which(mydata$fips %in% fips_count1$unique.values),]
mydata = mydata[which(mydata$fipszero %in% fipszero_count1$unique.values),]

#add county control
countydata = read.csv("data/county_controls.csv")

countydata <- data.table(countydata)
countydata[,pc_income.l1 := shift(pc_income), by = c("fips")]
countydata[,population.l1 := shift(population), by = c("fips")]
countydata[,hpi.l1 := shift(hpi), by = c("fips")]

countydata$Dl_inc = (log(countydata$pc_income)-log(countydata$pc_income.l1))*100
countydata$Dl_pop = (log(countydata$population)-log(countydata$population.l1))*100
countydata$Dl_hpi = (log(countydata$hpi)-log(countydata$hpi.l1))*100

countydata[,LDl_inc := shift(Dl_inc), by = c("fips")]
countydata[,LDl_pop := shift(Dl_pop), by = c("fips")]
countydata[,LDl_hpi := shift(Dl_hpi), by = c("fips")]

mydata = merge(mydata,countydata,by = c("year","fips"),all.x = T)

write.csv(mydata,row.names = F,col.names = T, "data/mydata_county.csv", sep="\t")

#-----------------------------
#Excluding credit card banks
#------------------------------

data = data_save

#banks = c(30810,112855,688079,486752,489913,1830035,2253891,2582023,1417557)
banks = c(30810,112855,486752,489913,1830035,2253891,2582023,1417557)
data$credit_bank = ifelse(data$bankid %in% banks,1,0)
#Bank 688079 became a credit card bank in 2004
data$credit_bank = ifelse(data$year>=2004 & data$bankid==688079,1,data$credit_bank)
data = data[which(data$credit_bank==0),]

data = data[order(data$fips),]
data = data[order(data$year),]
data <- data.table(data)

#weighted bank hhi using tot_amt as weightes
data[,fips_amt := sum(tot_amt), by = c("fips","year")]

data$bank_fips_amt = data$tot_amt/data$fips_amt

data$herfdepcty_amt=data$bank_fips_amt*data$herfdepcty

herfdepcty_fips = subset(data, select = c(fips,year,herfdepcty_amt))
herfdepcty_fips = herfdepcty_fips[complete.cases(herfdepcty_fips$herfdepcty_amt),]
herfdepcty_fips <- aggregate(herfdepcty_fips$herfdepcty_amt, by = list(herfdepcty_fips$fips, herfdepcty_fips$year), FUN = "sum")
colnames(herfdepcty_fips)= c("fips","year","herfdepcty_amt")

mydata = subset(data, select = c(fips,year,tot_amt))
mydata <- aggregate(mydata$tot_amt, by = list(mydata$fips, mydata$year), FUN = "sum")
colnames(mydata)= c("fips","year","tot_amt")

avgherfdepcty = read.dta13("data/DSS_Data/avgherfdepcty.dta")
mydata <- merge(mydata,avgherfdepcty,by = c("fips"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

mydata <- merge(mydata,herfdepcty_fips,by = c("fips","year"),all=F)

mydata = mydata[order(mydata$year),]

ff_tar_yearend = read.dta13("data/DSS_Data/ff_tar_yearend.dta")
mydata <- merge(mydata,ff_tar_yearend,by = c("year"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

bls_final = read.dta13("data/DSS_Data/bls_final.dta")
mydata <- merge(mydata,bls_final,by = c("fips","year"),all=F)

mydata$d1_ffm_herfdepcty_amt=mydata$d1_fftar_yearend*mydata$herfdepcty_amt

mydata$d1_ffm_avgherfdepcty=mydata$d1_fftar_yearend*mydata$avgherfdepcty

mydata$log_tot_amt=log(mydata$tot_amt)

mydata$zerolower = ifelse(mydata$year>=2009,1,0)

mydata$fipszero = paste0(mydata$fips,mydata$zerolower)

mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.infinite(x),NA)))
mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.nan(x),NA)))

mydata = as.data.frame(mydata)

mydata = mydata[complete.cases(mydata$log_tot_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_avgherfdepcty),]
mydata = mydata[complete.cases(mydata$herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_lnemp),]
mydata = mydata[complete.cases(mydata$d1_lntotwage),]

fips_count <- aggregate(x = mydata$fips, by = list(unique.values = mydata$fips), FUN = length)
fips_count1 = fips_count[which(fips_count$x >= 2),]

fipszero_count <- aggregate(x = mydata$fipszero, by = list(unique.values = mydata$fipszero), FUN = length)
fipszero_count1 = fipszero_count[which(fipszero_count$x >= 2),]

mydata = mydata[which(mydata$fips %in% fips_count1$unique.values),]
mydata = mydata[which(mydata$fipszero %in% fipszero_count1$unique.values),]


countydata = read.csv("data/county_controls.csv")

countydata <- data.table(countydata)
countydata[,pc_income.l1 := shift(pc_income), by = c("fips")]
countydata[,population.l1 := shift(population), by = c("fips")]
countydata[,hpi.l1 := shift(hpi), by = c("fips")]

countydata$Dl_inc = (log(countydata$pc_income)-log(countydata$pc_income.l1))*100
countydata$Dl_pop = (log(countydata$population)-log(countydata$population.l1))*100
countydata$Dl_hpi = (log(countydata$hpi)-log(countydata$hpi.l1))*100

countydata[,LDl_inc := shift(Dl_inc), by = c("fips")]
countydata[,LDl_pop := shift(Dl_pop), by = c("fips")]
countydata[,LDl_hpi := shift(Dl_hpi), by = c("fips")]

mydata = merge(mydata,countydata,by = c("year","fips"),all.x = T)

write.csv(mydata,row.names = F,col.names = T, "data/mydata_county_ecredit.csv", sep="\t")


#-----------------------------
#Excluding Banks with < $1 bil in assets
#------------------------------

data = read.csv("data/cra_bank_county_1bil.csv")


data = data[order(data$fips),]
data = data[order(data$year),]
data <- data.table(data)

#weighted bank hhi using tot_amt as weightes
data[,fips_amt := sum(tot_amt), by = c("fips","year")]

data$bank_fips_amt = data$tot_amt/data$fips_amt

data$herfdepcty_amt=data$bank_fips_amt*data$herfdepcty

herfdepcty_fips = subset(data, select = c(fips,year,herfdepcty_amt))
herfdepcty_fips = herfdepcty_fips[complete.cases(herfdepcty_fips$herfdepcty_amt),]
herfdepcty_fips <- aggregate(herfdepcty_fips$herfdepcty_amt, by = list(herfdepcty_fips$fips, herfdepcty_fips$year), FUN = "sum")
colnames(herfdepcty_fips)= c("fips","year","herfdepcty_amt")

mydata = subset(data, select = c(fips,year,tot_amt))
mydata <- aggregate(mydata$tot_amt, by = list(mydata$fips, mydata$year), FUN = "sum")
colnames(mydata)= c("fips","year","tot_amt")

avgherfdepcty = read.dta13("data/DSS_Data/avgherfdepcty.dta")
mydata <- merge(mydata,avgherfdepcty,by = c("fips"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

mydata <- merge(mydata,herfdepcty_fips,by = c("fips","year"),all=F)

mydata = mydata[order(mydata$year),]

ff_tar_yearend = read.dta13("data/DSS_Data/ff_tar_yearend.dta")
mydata <- merge(mydata,ff_tar_yearend,by = c("year"),all=F)

mydata = mydata[order(mydata$fips),]
mydata = mydata[order(mydata$year),]

bls_final = read.dta13("data/DSS_Data/bls_final.dta")
mydata <- merge(mydata,bls_final,by = c("fips","year"),all=F)

mydata$d1_ffm_herfdepcty_amt=mydata$d1_fftar_yearend*mydata$herfdepcty_amt

mydata$d1_ffm_avgherfdepcty=mydata$d1_fftar_yearend*mydata$avgherfdepcty

mydata$log_tot_amt=log(mydata$tot_amt)

mydata$zerolower = ifelse(mydata$year>=2009,1,0)

mydata$fipszero = paste0(mydata$fips,mydata$zerolower)

mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.infinite(x),NA)))
mydata  = do.call(data.frame,lapply(mydata , function(x) replace(x, is.nan(x),NA)))

mydata = as.data.frame(mydata)

mydata = mydata[complete.cases(mydata$log_tot_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_ffm_avgherfdepcty),]
mydata = mydata[complete.cases(mydata$herfdepcty_amt),]
mydata = mydata[complete.cases(mydata$d1_lnemp),]
mydata = mydata[complete.cases(mydata$d1_lntotwage),]

fips_count <- aggregate(x = mydata$fips, by = list(unique.values = mydata$fips), FUN = length)
fips_count1 = fips_count[which(fips_count$x >= 2),]

fipszero_count <- aggregate(x = mydata$fipszero, by = list(unique.values = mydata$fipszero), FUN = length)
fipszero_count1 = fipszero_count[which(fipszero_count$x >= 2),]

mydata = mydata[which(mydata$fips %in% fips_count1$unique.values),]
mydata = mydata[which(mydata$fipszero %in% fipszero_count1$unique.values),]


countydata = read.csv("data/county_controls.csv")

countydata <- data.table(countydata)
countydata[,pc_income.l1 := shift(pc_income), by = c("fips")]
countydata[,population.l1 := shift(population), by = c("fips")]
countydata[,hpi.l1 := shift(hpi), by = c("fips")]

countydata$Dl_inc = (log(countydata$pc_income)-log(countydata$pc_income.l1))*100
countydata$Dl_pop = (log(countydata$population)-log(countydata$population.l1))*100
countydata$Dl_hpi = (log(countydata$hpi)-log(countydata$hpi.l1))*100

countydata[,LDl_inc := shift(Dl_inc), by = c("fips")]
countydata[,LDl_pop := shift(Dl_pop), by = c("fips")]
countydata[,LDl_hpi := shift(Dl_hpi), by = c("fips")]

mydata = merge(mydata,countydata,by = c("year","fips"),all.x = T)

write.csv(mydata,row.names = F,col.names = T, "data/mydata_county_1bil.csv", sep="\t")



