rm(list=ls())

library(foreign) #read.dta, inport data
library(data.table) #importing from the web using fread
library(lfe) #for lm with fixed effect (felm)
library(systemfit)
library(dummies)
library(psych) #for winsor

#set working directory here
setwd("")


#--------#
#   Data #
#--------#
#consistent call-report time series from downloaded from http://pages.stern.nyu.edu/~pschnabl/data/data_callreport.htm 
banksdata= read.dta("data/Original_Data/call_report/callreports_final.dta") 

#Keep only year end obs 
banksdata$dateqtr = as.yearqtr(as.Date(as.character(banksdata$date), "%Y%m%d"), format="%YQ%q")
banksdata = banksdata[which(banksdata$quarter == 4),]

#-----------------
# cleaning call data
#------------------
#1. Non negative loan and assets
banksdata = banksdata [which(banksdata$loans > 0 & banksdata$assets > 0),]

#2. period for Lerner 
banksdata1 = banksdata[which(banksdata$year>1995 & banksdata$year<2015),]

#-----------------------#
# Baseline Lerner Index #
#-----------------------#
## call report Variable codes
#operinc - RIAD4000
# interest income - intincassets - RIAD4107
# non-interest income - nonintinc - RIAD4079 
#assets - RCFD2170
#intexp - RIAD4073
#salaries - RIAD4135
#exponpremises - RIAD4217
#deposits - RCFD2200

#Define main variables
banksdata1$P = rowSums(banksdata1[,c("intincassets","nonintinc")], na.rm=TRUE)/banksdata1$assets
banksdata1$C = rowSums(banksdata1[,c("intexp","salaries","exponpremises")], na.rm=TRUE)
banksdata1$Q = banksdata1$assets
banksdata1$W1 = banksdata1$exponpremises/banksdata1$assets
banksdata1$W2 = banksdata1$salaries/banksdata1$assets
banksdata1$W3 = banksdata1$intexp/banksdata1$deposits

#winsorized
banksdata1$P = winsor(banksdata1$P, trim = 0.01, na.rm = TRUE)
banksdata1$C = winsor(banksdata1$C, trim = 0.01, na.rm = TRUE)
banksdata1$Q = winsor(banksdata1$Q, trim = 0.01, na.rm = TRUE)
banksdata1$W1 = winsor(banksdata1$W1, trim = 0.01, na.rm = TRUE)
banksdata1$W2 = winsor(banksdata1$W2, trim = 0.01, na.rm = TRUE)
banksdata1$W3 = winsor(banksdata1$W3, trim = 0.01, na.rm = TRUE)

#Data transformation
banksdata1$c = log(banksdata1$C)
banksdata1$q = log(banksdata1$Q)
banksdata1$q2 = 0.5*banksdata1$q*banksdata1$q
banksdata1$w1 = log(banksdata1$W1)
banksdata1$w2 = log(banksdata1$W2)
banksdata1$w3 = log(banksdata1$W3)
banksdata1$w1w1 = banksdata1$w1*banksdata1$w1
banksdata1$w1w2 = banksdata1$w1*banksdata1$w2
banksdata1$w1w3 = banksdata1$w1*banksdata1$w3
banksdata1$w2w2 = banksdata1$w2*banksdata1$w2
banksdata1$w2w3 = banksdata1$w2*banksdata1$w3
banksdata1$w3w3 = banksdata1$w3*banksdata1$w3
banksdata1$w1q = banksdata1$w1*banksdata1$q
banksdata1$w2q = banksdata1$w2*banksdata1$q
banksdata1$w3q = banksdata1$w3*banksdata1$q
banksdata1 = cbind.data.frame(banksdata1,dummy(banksdata1$year))

#subtract the bank individual means for   FE
banksdata1 <- setDT(banksdata1)[, mean_c := mean(c, na.rm=T),by = c("rssdid")]
banksdata1$dm_c =  banksdata1$c - banksdata1$mean_c 

banksdata1 <- setDT(banksdata1)[, mean_q := mean(q, na.rm=T),by = c("rssdid")]
banksdata1$dm_q = banksdata1$q - banksdata1$mean_q  

banksdata1 <- setDT(banksdata1)[, mean_q2 := mean(q2, na.rm=T),by = c("rssdid")]
banksdata1$dm_q2 = banksdata1$q2 - banksdata1$mean_q2  

banksdata1 <- setDT(banksdata1)[, mean_w1 := mean(w1, na.rm=T),by = c("rssdid")]
banksdata1$dm_w1 = banksdata1$w1 - banksdata1$mean_w1  

banksdata1 <- setDT(banksdata1)[, mean_w2 := mean(w2, na.rm=T),by = c("rssdid")]
banksdata1$dm_w2 = banksdata1$w2 - banksdata1$mean_w2  

banksdata1 <- setDT(banksdata1)[, mean_w3 := mean(w3, na.rm=T),by = c("rssdid")]
banksdata1$dm_w3 = banksdata1$w3 - banksdata1$mean_w3  

banksdata1 <- setDT(banksdata1)[, mean_w1w1 := mean(w1w1, na.rm=T),by = c("rssdid")]
banksdata1$dm_w1w1 = banksdata1$w1w1 - banksdata1$mean_w1w1  

banksdata1 <- setDT(banksdata1)[, mean_w1w2 := mean(w1w2, na.rm=T),by = c("rssdid")]
banksdata1$dm_w1w2 = banksdata1$w1w2 - banksdata1$mean_w1w2  

banksdata1 <- setDT(banksdata1)[, mean_w1w3 := mean(w1w3, na.rm=T),by = c("rssdid")]
banksdata1$dm_w1w3 =  banksdata1$w1w3 - banksdata1$mean_w1w3  

banksdata1 <- setDT(banksdata1)[, mean_w2w2 := mean(w2w2, na.rm=T),by = c("rssdid")]
banksdata1$dm_w2w2 = banksdata1$w2w2 - banksdata1$mean_w2w2  

banksdata1 <- setDT(banksdata1)[, mean_w2w3 := mean(w2w3, na.rm=T),by = c("rssdid")]
banksdata1$dm_w2w3 = banksdata1$w2w3 - banksdata1$mean_w2w3 

banksdata1 <- setDT(banksdata1)[, mean_w3w3 := mean(w3w3, na.rm=T),by = c("rssdid")]
banksdata1$dm_w3w3 = banksdata1$w3w3 - banksdata1$mean_w3w3 

banksdata1 <- setDT(banksdata1)[, mean_w1q := mean(w1q, na.rm=T),by = c("rssdid")]
banksdata1$dm_w1q = banksdata1$w1q - banksdata1$mean_w1q

banksdata1 <- setDT(banksdata1)[, mean_w2q := mean(w2q, na.rm=T),by = c("rssdid")]
banksdata1$dm_w2q = banksdata1$w2q - banksdata1$mean_w2q  

banksdata1 <- setDT(banksdata1)[, mean_w3q := mean(w3q, na.rm=T),by = c("rssdid")]
banksdata1$dm_w3q = banksdata1$w3q - banksdata1$mean_w3q  

banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.infinite(x),NA)))
banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.nan(x),NA)))

### Restrictions of symmetry and degree one homogeneity###
restrict <- c("eq1_dm_w1+eq1_dm_w2+eq1_dm_w3=1",
              "eq1_dm_w1q+eq1_dm_w2q+eq1_dm_w3q=0"
              ,"eq1_dm_w1w1+eq1_dm_w1w2+eq1_dm_w1w3=0",
              "eq1_dm_w1w2+eq1_dm_w2w2+eq1_dm_w2w3=0",
              "eq1_dm_w1w3+eq1_dm_w2w3+eq1_dm_w3w3=0")

fitols <- systemfit(dm_c~0+dm_q+dm_q2+dm_w1+dm_w2+dm_w3 + dm_w1w1 + dm_w1w2 + dm_w1w3 + dm_w2w2 + dm_w2w3 + dm_w3w3 + dm_w1q + dm_w2q + dm_w3q
                    + factor(year), 
                    data = banksdata1, method = "OLS",restrict.matrix = restrict)

coeff = as.data.frame(t(fitols$coefficients))
rownames(coeff) = c("")

banksdata1$SEC_OLS= coeff$eq1_dm_q + coeff$eq1_dm_q2*banksdata1$q + coeff$eq1_dm_w1q*banksdata1$w1 + coeff$eq1_dm_w2q*banksdata1$w2 + coeff$eq1_dm_w3q*banksdata1$w3
banksdata1$MC_ols=banksdata1$SEC_OLS*(banksdata1$C/banksdata1$Q)

banksdata1$Lerner=(banksdata1$P-banksdata1$MC_ols)/banksdata1$P  
colnames(banksdata1)[colnames(banksdata1)=="rssdid"] = "bankid"

Lerner1 = subset(banksdata1, select = c(bankid,year,Lerner))
colnames(Lerner1) = c("bankid","year","Lerner")

write.csv(Lerner1,row.names = F,col.names = T, "data/Lerner_data.csv", sep="\t")

#*****************
# Other options  : 
#*****************
#1. impose homogeneity of degree 1 on input prices by dividing all factor prices and TOC by w3

banksdata1$c = log(banksdata1$C/banksdata1$W3)
banksdata1$q = log(banksdata1$Q)
banksdata1$q2 = 0.5*banksdata1$q*banksdata1$q
banksdata1$w1 = log(banksdata1$W1/banksdata1$W3)
banksdata1$w2 = log(banksdata1$W2/banksdata1$W3)
banksdata1$w1w2 = banksdata1$w1*banksdata1$w2
banksdata1$w1w1 = banksdata1$w1*banksdata1$w1
banksdata1$w2w2 = banksdata1$w2*banksdata1$w2
banksdata1$w1q = banksdata1$w1*banksdata1$q
banksdata1$w2q = banksdata1$w2*banksdata1$q
banksdata1$trend = banksdata1$year - 1995
banksdata1$trend_2 = banksdata1$trend*banksdata1$trend 
banksdata1$trend_q = banksdata1$trend*banksdata1$q 
banksdata1$trend_w1 = banksdata1$trend*banksdata1$w1
banksdata1$trend_w2 = banksdata1$trend*banksdata1$w2
banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.infinite(x),NA)))
banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.nan(x),NA)))

ols <- felm(c~ q + q2 + w1 + w2 + w1w1 + w1w2 + w2w2 + w1q + w2q +
              trend + trend_2 + trend_q + trend_w1 + trend_w2 | 0 | 0 | 0 , data = banksdata1,na.action = na.omit)

coeff = as.data.frame(t(ols$coefficients))
rownames(coeff) = c("")

banksdata1$SEC_OLS= coeff$q + coeff$q2*banksdata1$q + coeff$w1q*banksdata1$w1 + coeff$w2q*banksdata1$w2 + coeff$trend_q*banksdata1$trend 
banksdata1$MC_ols=banksdata1$SEC_OLS*(banksdata1$C/banksdata1$Q)

banksdata1$Lerner=(banksdata1$P-banksdata1$MC_ols)/banksdata1$P  
colnames(banksdata1)[colnames(banksdata1)=="rssdid"] = "bankid"

Lerner2 = subset(banksdata1, select = c(bankid,year,Lerner))
colnames(Lerner2) = c("bankid","year","Lerner2")

#2. no homogeneity restrictions, no bank fe, with time trend####
banksdata1$trend = banksdata1$year - 1995
banksdata1$trend_2 = 0.5*banksdata1$trend*banksdata1$trend 
banksdata1$trend_q = banksdata1$trend*banksdata1$q 
banksdata1$trend_w1 = banksdata1$trend*banksdata1$w1
banksdata1$trend_w2 = banksdata1$trend*banksdata1$w2
banksdata1$trend_w3 = banksdata1$trend*banksdata1$w3
banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.infinite(x),NA)))
banksdata1 = do.call(data.frame,lapply(banksdata1, function(x) replace(x, is.nan(x),NA)))

ols <- felm(c~ q + q2 + w1 + w2+ w3 + w1w1 + w1w2 + w1w3 + w2w2 + w2w3 + w3w3 + w1q + w2q + w3q+
              trend + trend_2 + trend_q + trend_w1 + trend_w2 + trend_w3 | 0 | 0 | 0 , data = banksdata1,na.action = na.omit)

coeff = as.data.frame(t(ols$coefficients))
rownames(coeff) = c("")

banksdata1$SEC_OLS= coeff$q + coeff$q2*banksdata1$q + coeff$w1q*banksdata1$w1 + coeff$w2q*banksdata1$w2 + coeff$w3q*banksdata1$w3 + coeff$trend_q*banksdata1$trend 
banksdata1$MC_ols=banksdata1$SEC_OLS*(banksdata1$C/banksdata1$Q)

banksdata1$Lerner=(banksdata1$P-banksdata1$MC_ols)/banksdata1$P  
colnames(banksdata1)[colnames(banksdata1)=="rssdid"] = "bankid"

Lerner3 = subset(banksdata1, select = c(bankid,year,Lerner))
colnames(Lerner3) = c("bankid","year","Lerner3")


#********************
# Compare with  Fred: 
#********************

#Aggregate to country level by year and compare to Fred
Lerner1_year = subset(Lerner1, select = c(year,Lerner))
Lerner1_year = Lerner1_year[complete.cases(Lerner1_year),]
Lerner1_year <- aggregate(Lerner1_year$Lerner, by = list(Lerner1_year$year), FUN = mean)
colnames(Lerner1_year) = c("year","Lerner1")

Lerner2_year = subset(Lerner2, select = c(year,Lerner2))
Lerner2_year = Lerner2_year[complete.cases(Lerner2_year),]
Lerner2_year <- aggregate(Lerner2_year$Lerner2, by = list(Lerner2_year$year), FUN = mean)
colnames(Lerner2_year) = c("year","Lerner2")

Lerner3_year = subset(Lerner3, select = c(year,Lerner3))
Lerner3_year = Lerner3_year[complete.cases(Lerner3_year),]
Lerner3_year <- aggregate(Lerner3_year$Lerner3, by = list(Lerner3_year$year), FUN = mean)
colnames(Lerner3_year) = c("year","Lerner3")

Lerner_all = merge(Lerner1_year,Lerner2_year,by=c("year"))
Lerner_all = merge(Lerner_all,Lerner3_year,by=c("year"))

#Add Lerner Fred
#source: https://fred.stlouisfed.org/series/DDOI04USA066NWDB

Lerner_Fred =  read.csv("data/Lerner/Lerner_Fred.csv")
Lerner_test = merge(Lerner_all,Lerner_Fred,by = c("year"),all.x = T)

cor(Lerner_test[,c("Lerner1","Lerner2","Lerner3","Lerner_FRED")], use = "complete.obs",
    method = c("pearson"))


