insheet using ltv_dti_fico_nodups_new.csv,clear // output from subset.sas

*drop nonagency loans in Mcdash per footnote 13
drop if type=="rmbs" & jumbo_flg=="N"

*keep top 100 metros
sort cbsa_code
merge cbsa_code using population,nokeep //this file is just the top metos by originations in our data
keep if _merge==3
egen temp=group(population)
keep if temp<=100
rename temp poprank2
drop if appraisal_amt<10000 | appraisal_amt>5000000 | ltv == . | fico ==. | ltv >120
drop _merge

*everything in thousands of dollars
replace orig_amt=orig_amt/1000
replace app=app/1000

*make loan types consistent
replace loan_type="1" if loan_type=="C"
replace loan_type="1" if loan_type=="D"
replace loan_type="4" if loan_type !="1" & loan_type !="2" & loan_type !="3" 
gen loan_typet=loan_type
replace loan_typet="2" if loan_type=="3"
replace loan_typet="3" if loan_type=="2"
drop loan_type
rename loan_typet loantype
destring loantype,replace

*clean data a bit
gen idl=_n
gen loanamt=orig_amt
replace loanamt=round(loanamt)
tostring prop_zip,replace
replace prop_zip="0"+prop_zip if length(prop_zip)==4
replace prop_zip="00"+prop_zip if length(prop_zip)==3
gen ho=(occupancy_type=="1")
gen loanamt2=floor(loanamt/10)
replace orig_dt=close_dt if orig_dt==""
gen m=substr(orig_dt,1,2)
gen d=substr(orig_dt,4,2)
gen y=substr(orig_dt,7,4)
destring m d y,replace
gen numdate=mdy(m,d,y)
save temp100,replace

*load confidential hmda data and format, hmda data formatted differently depending on year
 local years = "04 05 06 07 08 09 10"
 foreach yy in `years'{
use hm`yy'_action1,clear
keep if loanpurp==1
des
gen single=(cosex==5)
keep loanamt loantype purtype occupy locate income actdate apprace1 lien appethn appsex single seq sq2
save hmda`yy',replace
}

local years= "11 12 13 14"
foreach yy in `years'{
use hm`yy'_action1,clear
keep if loanpurp==1
des
gen single=(cosex==5)
keep loanamt loantype purtype occupy locate income actdate apprace1 lien appethn appsex single seq 
save hmda`yy',replace
}

local years = "01 02 03"
 foreach yy in `years'{
use hm`yy'_action1,clear
keep if loanpurp==1
des
gen single=(cosex==5)
gen lien=1
rename minority appethn
rename apprace apprace1
keep loanamt loantype purtype occupy locate income actdate apprace1 appethn appsex single lien seq 
save hmda`yy',replace
}


**************************
*prep hmda data for merge
**************************

set more off
 local years = "01 02 03 04 05 06 07 08 09 10 11 12 13 14"
 capture log close
foreach yy in `years'{
log using `yy'.log,replace
forvalues j=1/2{
use hmda`yy',clear
gen yytemp=`yy'
gen idh=_n

*identify junior liens
if yytemp<=3 & `j'==2{
sort locate income appethn apprace1 appsex occupy actdate single loanamt
by locate income appethn apprace1 appsex occupy actdate single : gen temp=_n
by locate income appethn apprace1 appsex occupy actdate single : gen temp2=_N
gen temp3=loanamt/loanamt[_n+1]
*cases where three mortgages
replace temp3=loanamt/loanamt[_n+2] if temp==1 & temp2==3
keep if temp<temp2 & temp3<0.8
drop temp*
}

if yytemp<=3 & `j'==1{
sort locate income appethn apprace1 appsex occupy actdate single loanamt
by locate income appethn apprace1 appsex occupy actdate single : gen temp=_n
by locate income appethn apprace1 appsex occupy actdate single : gen temp2=_N
gen temp3=loanamt/loanamt[_n+1]
*cases where three mortgages
replace temp3=loanamt/loanamt[_n+2] if temp==1 & temp2==3
drop if temp<temp2 & temp3<0.8
drop temp*
}

if yytemp<=2 {
joinby locate using ziptract90,unmatched(master)
}
if yytemp>2 & yytemp<=11 {
joinby locate using ziptract2000,unmatched(master)
}
if yytemp>=12  {
joinby locate using ziptract00,unmatched(master)
}
keep if _merge==3
drop _merge yytemp 

gen m=month(actdate)
gen d=day(actdate)
gen y=year(actdate)
gen numdateh=mdy(m,d,y)
gen hoh=(occupy==1)
gen loanamt2=floor(loanamt/10)

gen yyt=`yy'
destring yyt,replace
if yyt>3{
keep if lien==`j'
}

gen yearvart="20"+"`yy'"
local yearvar=yearvart
replace loantype=4 if loantype==.
rename loantype loantypeh
drop m d y  occupy loanamt2 yearvart yyt
compress
save hmdatemp`j',replace
}

*********************
**load cleaned lps data and merge in hmda data
***********************

use temp100,clear
gen tempyear="`yearvar'"
destring tempyear,replace
keep if y==tempyear
drop tempyear

gen zip=prop_zip
joinby zip loanamt  using hmdatemp1, unmatched(master) 
gen diff=abs(numdate-numdateh)
gen diff3=1-(hoh==ho)
gen diff2=1-(loantypeh==loantype)

*only keep the hmda record that has the best match
sort idh diff diff3 diff2 
by idh: gen temp=_n
replace idh=. if temp>1 
replace diff=. if temp>1 
replace diff2=. if temp>1 
replace diff3=. if temp>1 

sort idl diff diff3 diff2 idh
by idl: gen match=1 if idh!=. & _n==1 & diff<=45 
by idl: egen smatch=sum(match)
by idl: gen smatch2=_n
gen k1=1 if smatch2==1 & smatch==0
keep if match==1 | k1==1 
gen goodmatch=(match==1)
drop  smatch* _merge k1 zip* temp match 
replace numdateh=. if goodmatch==0
replace income=. if goodmatch==0
replace idh=. if goodmatch==0
replace hoh=. if goodmatch==0
replace loantypeh=. if goodmatch==0
replace locate="" if goodmatch==0
replace actdate=. if goodmatch==0
replace apprace=. if goodmatch==0
replace appsex=. if goodmatch==0
replace single=. if goodmatch==0
replace appethn=. if goodmatch==0
replace seq=. if goodmatch==0

sort idh
save tempsave,replace

*find guys that have not matched yet; allow 4 digit zip merge for these guys
use hmdatemp1,clear
sort idh
merge idh using tempsave
keep if _merge==1
drop _merge
replace zip=substr(zip,1,4)
save hmdatemp1_t,replace

use tempsave,clear
rm tempsave.dta
gen zip=substr(prop_zip,1,4)
replace zip="XXXX" if goodmatch==1
joinby zip loanamt  using hmdatemp1_t, unmatched(master)  update
replace diff=abs(numdate-numdateh)
replace diff3=1-(hoh==ho)
replace diff2=1-(loantypeh==loantype)

*only keep the hmda record that has the best match
sort idh diff diff3 diff2 
by idh: gen temp=_n
replace idh=. if temp>1 
replace diff=. if temp>1 
replace diff2=. if temp>1 
replace diff3=. if temp>1 

sort idl diff diff3 diff2 idh
by idl: gen match=1 if idh!=. & _n==1 & diff<=45 
by idl: egen smatch=sum(match)
by idl: gen smatch2=_n
gen k1=1 if smatch2==1 & smatch==0
keep if match==1 | k1==1 
replace goodmatch=1 if match==1
*share that merge to hmda
tab goodmatch
keep if goodmatch==1
drop _merge temp

gen yyt=`yy'
destring yyt,replace


*************************
**merge on junior liens ; 
*************************
drop idh loanamt
joinby locate actdate hoh income apprace1 appsex single appethn using hmdatemp2, unmatched(master)  update
gen ltvcheck=(orig_amt+loanamt)/appraisal
replace _merge=1 if ltvcheck>1.2
replace _merge=1 if loanamt>orig_amt & loanamt!=.
replace loanamt=0 if _merge==1 | loanamt==.
**set all loan amounts equal to zero except for the best match
gsort idh -_merge
by idh: gen temp=_n
replace loanamt=0 if temp>1 
replace _merge=1 if temp>1

collapse (sum) loanamt, by(fico_orig orig_amt appraisal income y cbsa_code poprank2 idl frm_arm loantype locate) fast
rename y year
gen temp=(loanamt!=0)
*share of originations that merge to a junior lien
tab temp
replace loanamt=loanamt+orig_amt
gen dp=appraisal-loanamt
drop idl orig_amt appraisal temp

* put all dollars amounts into 2014 levels based on pce inflation
local vars = "loanamt dp income"
foreach j in `vars'{
replace `j'=`j'*1.28348 if year==2001
replace `j'=`j'*1.26648 if year==2002
replace `j'=`j'*1.24191 if year==2003
replace `j'=`j'*1.21241 if year==2004
replace `j'=`j'*1.17880 if year==2005
replace `j'=`j'*1.14809 if year==2006
replace `j'=`j'*1.12004 if year==2007
replace `j'=`j'*1.08687 if year==2008
replace `j'=`j'*1.08757 if year==2009
replace `j'=`j'*1.06989 if year==2010
replace `j'=`j'*1.04424 if year==2011
replace `j'=`j'*1.02541 if year==2012
replace `j'=`j'*1.01327 if year==2013
}

order fico_orig loanamt dp income y cbsa_code poprank2 frm_arm loantype locate
save msa`yy'.dta,replace

log close
}

*append all files together to create final dataset
use msa01,clear
local years = "02 03 04 05 06 07 08 09 10 11 12 13 14"
foreach yy in `years'{
append using msa`yy'
}
drop if income==.
replace cur_int_rate=-9 if cur_int_rate==.
count
gen frm=(frm_arm=="FRM")
drop frm_arm
order fico_orig loanamt dp income y cbsa_code poprank2 frm loantype locate
outsheet using frontierdata.txt,nonames replace
