/********************************************************************************
	 
	 Revisiting Gender Identity and Relative Income within Households –
	A cautionary tale on the potential pitfalls of density estimators.

		Daniel Kuehnle, Michael Oberfichtner, Kerstin Ostermann
						 Last update: March 2021
								
						Dofile for the US analyses
						
	Before executing this file, run KOO_SSBpreparation.do on the Synthetic
		   Data Server of the US Census Bureau and adjust PATH NAME
						
*******************************************************************************/
clear all
version 14.2

cd "PATH NAME"
cap log close
log using KOO_SIPPanalysis, replace t

//install DCdensity
adopath + "PATH NAME"
//install rddensity
global projdir "PATH NAME"
sysdir set PLUS "${projdir}/stata/plus"
sysdir set PERSONAL "${projdir}/stata/personal"

//load data set generated by KOO_[SIPP\SSB]preparation.do
use KOO_SIPP.dta, clear

gen all = 1
gen noEqInc = (femshare!=0.5)
	lab var noEqInc "=1 if spouses do not earn exact same income"
********************************************************************************
*Analysis preparation
********************************************************************************

	*Program that rounds the observation numbers for us according to rounding rules
	program define roundy
	cap drop totfemshare_rounded
	gen totfemshare_rounded =.
	replace totfemshare_rounded = round(totfemshare,10) if totfemshare<=99
	replace totfemshare_rounded = round(totfemshare,50) if totfemshare>99 & totfemshare<=999
	replace totfemshare_rounded = round(totfemshare,100) if totfemshare>999 & totfemshare<=9999
	replace totfemshare_rounded = round(totfemshare,500) if totfemshare>9999 & totfemshare<=99999
	replace totfemshare_rounded = round(totfemshare,1000) if totfemshare>99999
	end

*generate variables that are filled in loop
gen share = .
lab var share "Share earned by wife, midpoints"
gen relfemshare = .
lab var relfemshare "Share of couples with relative income in that bin"

*x-axis
global fs "femshare"

*Make snapshot of data
snapshot erase _all
snapshot save, label(before_loop)

foreach bp in 0.499999 0.5 0.500001 {
foreach size in 0.05 0.01 {  //
foreach group in all noEqInc {  //

disp "`size' %bins, group: `group'"

snapshot restore 1

*Insert sample restriction here
quietly keep if `group' == 1
disp r(N_drop) "couples dropped due to group"

local size100 = `size'*100  // will use this to name files
local n_bins = 1 / `size'
local bp100k = `bp'*1000000

foreach identify of numlist 1/16{
	gen totfemshare`identify' = .
	* generate share group variable and compute shares
	**total sample
	count if identify == `identify'
	gen __N`identify' = r(N)

	quietly foreach num of numlist 1/`n_bins' {
		replace share = `num' / `n_bins' - 0.5 * `size' in `num'
		count if $fs > (`num'-1) * `size' & $fs <=`num' *`size' & identify== `identify'
		replace totfemshare`identify' = r(N) in `num'
		} // bin-number
	} //identify
******************************************
**Necessary descriptives for the US Census Bureau
egen totfemshare = rowmean(totfemshare*)
drop totfemshare1 totfemshare2 totfemshare3 totfemshare4
	lab var totfemshare "Fraction of couples,unrounded"
	roundy
	
**generating the mean sample size of all 16 implicates
cap drop totalsample
egen totalsample = rowmean(__N*)
	lab var totalsample "Average total sample size"
	gen tsample_rounded = totalsample
	replace tsample_rounded = round(totalsample,500) if totalsample>9999 & totalsample<=99999
	replace tsample_rounded = round(totalsample,1000) if totalsample>99999
replace relfemshare = totfemshare_rounded/tsample_rounded

**Tables for US Census statt, necessary for release
outsheet totfemshare totfemshare_rounded totalsample tsample_rounded using "wDC_`size100'bin_`group'_bp`bp100k'.txt", replace
outsheet totfemshare_rounded tsample_rounded using "wDC_`size100'bin_rounded_`group'_bp`bp100k'.txt", replace

******************************************
**"locally wieghtes scatterplot smoothing" on each side of femshare=1/2
lowess relfemshare share if share<`bp', gen(Yj_smooth_left) bwidth(0.5) nograph
lowess relfemshare share if share>`bp' & share<=1, gen(Yj_smooth_right) bwidtth(0.5) nograph

*default mean calulation
foreach identify of numlist 1/16{
	*run default
	cap drop Yj Xj r0 fhat se_fhat
	DCdensity $fs if identify== `identify', breakpoint(`bp') ///
		generate(Xj Yj r0 fhat se_fhat) nograph
	*save results
	gen dbin`identify' = r(binsize)
	} //identify
	
*average
egen av_dbin_flex= rowmean(dbin*)
sum av_dbin_flex
local avbin_flex_default = r(mean)

foreach identify of numlist 1/16{
	//Run DCdensity command
		*we set b() to the average bin size to fix the number of bins
		* over all implicates
	cap drop Yj Xj r0 fhat se_fhat
	DCdensity $fs if identify== `identify', b(`avbin_flex_default') breakpoint(`bp') ///
		generate(Xj Yj r0 fhat se_fhat) nograph
	*save results
	gen dtheta`identify' = r(theta)
	gen dse`identify'= r(se)
	gen dbw`identify'= r(bandwidth)
	gen dbinsize`identify'= r(binsize)

	//Run DCdensity command
	cap drop Yj Xj r0 fhat se_fhat
	DCdensity $fs if identify== `identify', b(`size') breakpoint(`bp') ///
		generate(Xj Yj r0 fhat se_fhat)

	*save results and default parameters
	gen theta`identify' = r(theta)
	gen se`identify'= r(se)
	gen bw`identify'= r(bandwidth)

	*save for plot
	gen yj`identify' = Yj
	gen xj`identify' = Xj
	gen r0`identify' = r0
	gen fhat`identify' = fhat

	//Run rddensity
	*store min and max for plotting boundaries
	sum $fs
	local min_fs = r(min)
	local max_fs = r(max)

	cap drop RD_*
	rddensity $fs if identify== `identify', c(`bp') plot genvars(RD) ///
		plot_range(`min_fs' `max_fs') plot_n(50 50) plot_grid(es)
	
	*save results
	gen fq`identify' = e(f_qr) - e(f_ql)
	gen logfq`identify' = ln(e(f_qr)) - ln(e(f_ql))
	gen rdseq`identify' = e(se_q)

	*save for plot
	gen rd_f`identify' = RD_f
	gen rd_grid`identify'  = RD_grid

	} //identify
********************************************************************************
**Average and round estimates
********************************************************************************
//DCdensity
*average
egen av_dtheta = rowmean(dtheta*)
egen av_dse = rowmean(dse*)
egen av_dbw = rowmean(dbw*)
egen av_dbin = rowmean(dbinsize*)

egen av_theta = rowmean(theta*)
egen av_se = rowmean(se*)
egen av_bw = rowmean(bw*)

egen avr0 = rowmean(r0*)
egen avfhat = rowmean(fhat*)

*round
local dtheta = round(av_dtheta, 0.0001)
local dse =  round(av_dse, 0.0001)
local dbandwidth = round(av_dbw, 0.0001)
local dbinsize = round(av_dbin, 0.0001)

local theta = round(av_theta, 0.0001)
local se =  round(av_se, 0.0001)
local bandwidth = round(av_bw, 0.0001)

replace avr0 = round(avr0, 0.0001)
replace avfhat = round(avfhat, 0.0001)

//rddensity
*average
egen av_fq = rowmean(fq*)
egen av_logfq = rowmean(logfq*)
egen av_seq = rowmean(rdseq*)

egen avRD_f = rowmean(rd_f*)
egen avRD_grid = rowmean(rd_grid*)

*round
local fq = round(av_fq, 0.0001)
local logfq = round(av_logfq, 0.0001)
local seq = round(av_seq, 0.0001)

replace avRD_f = round(avRD_f, 0.0001)
replace avRD_grid = round(avRD_grid, 0.0001)


local __N = tsample_rounded[1]
********************************************************************************
*generate graph
********************************************************************************
gr twoway (line Yj_smooth_left share if share< `bp', ///
	lcolor(black) lwidth(medthick) lpattern(longdash)) ///
	(line Yj_smooth_right share if share> `bp', ///
	lcolor(black) lwidth(medthick) lpattern(longdash)),  ///
	xline(`bp', lcolor(black)) legend(off) ///
	yscale(range(0.015(0.02)0.08)) ylabel(0.02 0.04 0.06 0.08) ///
	graphregion(col(white)icol(white)) ///
	plotregion(col(white)icol(white)) ///
	xtitle("Share earned by the wife") ytitle("Fraction of couples") ///
	note("DCdensity default: log diff. in height (se): `dtheta' (`dse'), bw: `bandwidth', bin: `dbinsize'" ///
		"DCdensity: log difference in height (se): `theta' (`se'), bandwidth: `bandwidth' " ///
		"RDdensity: [log] difference in height (se): [`logfq'] `fq' (`seq'); N=`__N' couples" )

graph export "SIPP_`size100'bin_`group'_bp`bp100k'.png", as(png) replace
														*export of .pdf files is not allowed

} // end group
} // end bin
} // end breakpoint

log close
exit
