clear
set more off
log using MLE_listing_selling.log, replace

import delimited "MLS.csv", clear

/*------------------------------------------------------------------------
Definition of the variables used in the estimation

* sold_dummy: dummy for whether the listing is sold
* H: intrinsic value estimated by using the procedure described in Online Appendix A 
* logH = log(H)
* saleamount: transaction price for the sold listing
* listprice: listing price
* rL = listprice/H
* rS = saleamount/H
(Note: saleamount is positive for all observations with sold_dummy = 1, while it is missing if sold_dummy = 0. Since Stata may exclude observations with missing values in rS, rS is set to 0 if sold_dummy = 0.)
* lnrL = ln(rL)
* lnrS = ln(rS)
* client_owned: dummy for whether the property is owned by the agent
* condo: dummy for whether the property is condominium
* attached: dummy for whether the property is attached to other houses
* co_op: dummy for whether the type of the property is a housing cooperative 
* beds: the number of bedrooms
* rooms: the number of rooms
* fullbaths: the number of full bathrooms
* garage: the number of garage spaces
* house_age: house age
* basement_full: dummy for whether the property has a full basement.
* basement_partial:  dummy for whether the property has a partial basement.
* avg_ln_list: the tract-level average log listing price, excluding the house.
* zipcode_FE: integer value assigned to each 5 digit zip code
* year_month_FE: integer value assigned to each year and month

------------------------------------------------------------------------*/


*-----------------
* ML estimation 
* The parameter groups (A, B, C, D, E) below correspond to the panel names in Table 5. 
*-----------------

* house characteristics
global Z condo attached co_op beds rooms fullbaths garage house_age basement_full basement_partial


* 1. Simple model: no correlation
ml model lf lnL_rL (A: sold_dummy lnrS lnrL = client_owned $Z i.year_month_FE i.zipcode_FE) (B: client_owned $Z i.year_month_FE i.zipcode_FE) (C: client_owned $Z i.year_month_FE i.zipcode_FE) (D: client_owned) (E: client_owned) , vce(cluster census_tract)
ml search
ml maximize, difficult 
matrix a1 = e(b)
estimates store a1


* 2. Simple model: no correlation & rL
ml model lf lnL_rL (A: sold_dummy lnrS lnrL = client_owned $Z i.year_month_FE i.zipcode_FE) (B: client_owned lnrL $Z i.year_month_FE i.zipcode_FE) (C: client_owned lnrL $Z i.year_month_FE i.zipcode_FE) (D: client_owned) (E: client_owned), vce(cluster census_tract)
ml init a1, skip
ml search
ml maximize, difficult 
matrix a2 = e(b)
estimates store a2


* 3. Simple model: no correlation & rL & IV
ml model lf lnL_rL (A: sold_dummy lnrS lnrL = client_owned logH avg_ln_list $Z i.year_month_FE i.zipcode_FE) (B: client_owned lnrL $Z i.year_month_FE i.zipcode_FE) (C: client_owned lnrL $Z i.year_month_FE i.zipcode_FE) (D: client_owned) (E: client_owned), vce(cluster census_tract)
ml init a2, skip
ml search
ml maximize, difficult 
matrix a3 = e(b)
estimates store a3


* 4. Main model: correlation & rL & IV
ml model lf lnL_rL_tobit (A: sold_dummy lnrS lnrL = client_owned logH avg_ln_list $Z i.year_month_FE i.zipcode_FE) (B: client_owned $Z i.year_month_FE i.zipcode_FE) /alpha (C: client_owned $Z i.year_month_FE i.zipcode_FE) /gamma (D: client_owned) (E: client_owned) /rho12 /rho13 /rho23, vce(cluster census_tract)
ml init a3, skip
ml search
ml maximize, difficult 
matrix a4 = e(b)
estimates store a4



* Tables
esttab a1 a2 a3 a4, b(%12.4f)  se keep(A:client_owned A:avg_ln_listreal  A:logH B:client_owned alpha:_cons B:lnrL C:client_owned gamma:_cons C:lnrL D:client_owned D:_cons E:client_owned E:_cons rho12:_cons rho13:_cons rho23:_cons) star(+ 0.10 * 0.05 ** 0.01 )


log close
