
*** Final steps before running estimation ***
* - split the panel of high-tech industries
* - creat treatment variables (ownership)

global path "."					/*path on your computer*/
cd "$path"

use "$path\_datasets\TFP_china.dta", clear /*path to the cleaned panel dataset 1998-2007*/

global Tech = "( cic_2_digit==35 | cic_2_digit==36" ///
			+"| cic_2_digit==37 | cic_2_digit==39 | cic_2_digit==40" ///
			+"| cic_2_digit==41)"
						
keep if $Tech

* Rename all main variables for better notations
tab Ownership, gen(Ownership)
rename lnInputShare sm
rename Ownership4 v4
rename Ownership5 v5

gen type = .
replace type = 1 if Ownership<=3
replace type = 2 if Ownership==4
replace type = 3 if Ownership==5

*drop k l m
ren lnrY yg
ren lnrK k
ren lnL l
ren lnrM m
		
* Generating and Rename Variables
bysort NewID (year): gen d_OECD = (Ownership[_n]==5 & Ownership[_n-1]<=3 & _n>1)
bysort NewID (year): gen d_HKMT = (Ownership[_n]==4 & Ownership[_n-1]<=3 & _n>1)
bysort NewID (year): gen ex_OECD = (d_OECD[_n+1]==1 & d_OECD[_n]==0)
bysort NewID (year): gen ex_HKMT = (d_HKMT[_n+1]==1 & d_HKMT[_n]==0)

gen d = (d_HKMT==1 | d_OECD==1)
gen v = (v4==1 | v5==1)
bysort NewID (year): gen ex = (d[_n+1]==1 & d[_n]==0)

* Generating Treatment Indicator for Panel
egen treat = sum(d), by(NewID)
codebook NewID if treat>=1
codebook NewID if treat>=2

egen treatHKMT = sum(d_HKMT), by(NewID)
codebook NewID if treatHKMT>=1
codebook NewID if treatHKMT>=2 & d_HKMT==1

egen treatOECD = sum(d_OECD), by(NewID)
codebook NewID if treatOECD>=1
codebook NewID if treatOECD>=2 & d_OECD==1


* Some further cleaning before estimation
********************************************************************************
* CLEANING CODE 1

drop if (K <= 0 | K ==.) /*dropped 2948 observations*/
drop if (rK <= 0 | rK ==.) /*dropped 1992 observations*/
drop if (rVA <= -0.01 | rVA ==.) /*dropped 28957 observations*/
drop if (L <= 8 | L ==.) /*dropped 3435 observations*/
drop if (M <= 0 | M ==.) /*dropped 476 observations*/

drop if InputShare>=1 /*dropped 4924 observations*/
drop if InputShare<=0 /*dropped 0 observations*/

drop if OECDShare < 0 /*dropped 1 observations*/
drop if HKMTShare < 0 /*dropped 0 observations*/
drop if OECDShare > 1 /*dropped 4 observations*/

********************************************************************************
* CLEANING CODE 2

* Drop outliers and SOE firms
	* if firms switch twice
	drop if treat>=2 			/*dropped 534 obs*/
	
	* if outliers 1% and 99% of log material share
	su sm, d
	drop if sm<=-4 | sm>=-0.05	/*dropped 10931 obs*/
	
	* if SOE firms
	drop if Ownership==1		/*dropped 60329 obs*/
	
	* if log employment is too large 99%
	drop if l>=10				/*dropped 10 obs*/
	
save "$path\_datasets\TFP_china_tech.dta", replace

* 
cap drop laborshare materialshare capshare share_ratio
gen laborshare = tot_wage/Y
gen materialshare = InputShare
gen share_relative = laborshare/materialshare


log using share_check, replace
tabstat laborshare materialshare share_relative if v==1, by(year)
tabstat laborshare materialshare share_relative if v==0, by(year)
log c


