clear
macro drop _all
matrix drop _all
clear mata
cap clear matrix
set memory 500m
set more off
version 13
set scheme s1manual
*****************************This file puts together the cognitive, non-cognitive and health measures from the adult and the youth (17) samples of the GSOEP******************

	* Insert global pathnames here
		*path where the SOEP-wide data are stored
		global datapath    ""
		*path where the SOEP-long data are stored
		global datapathl   ""
		*path where the data should be saved
		global datapatha   ""
		*path where the do-files are saved
		global dofilepath  ""

*****************************adult data*****************************************

	do "$dofilepath\adultdata.do"
	sort hhnr persnr youth
	save "$datapatha\data1.dta", replace
	
*****************************youth data*****************************************	
	use "$datapatha\data1.dta", clear
	do "$dofilepath\youthdata.do"
	save "$datapatha\data2.dta", replace
	
*****************************************************************define sample*******************************************************************
	use "$datapatha\data2.dta", clear

**1* keep only people between 26 and 65 years of age
	drop if (b_age<26 | b_age>65) & youth==0
**2* keep only the samples A(west Germany), C(east Germany), E(test samples for innovation, representative), F(innovation sample, representative), H(additional sample, representative)
	keep if (sample1==1 | sample1==7 | sample1==10 | sample1==11 | sample1==12 | sample1==16 | sample1==.) | youth==1
**3* kick out everybody who is not German 
	drop if b_german==0 
**4*drop everyone not 17 from the 2001 interview
	drop if y_erhebj==2001 & y_gebjah!=1984 & youth==1
***5* Kick out adult individuals in vocational training
	drop if o_train==1 & youth==0
***6* Kick out people in "Altersteilzeit mit Arbeitszeit Null"?
	drop if (o_empst==8 | o_empst==5 ) & youth==0
***7*drop all individuals that have no information for the education variable (no connecting of samples possible)
	drop if o_fhuni_all==.
***8*drop all individuals who are not part of the labor force
	drop if o_olf==1 & youth==0
***9*drop all youth individuals that have missings in all measurements 
	drop if youth==1 & (yLoc_sverl>100 | yLoc_serre>100 | yLoc_sglue>100 | yLoc_sand>100 | yLoc_shart>100 | yLoc_szwei>100 | yLoc_ssozu>100 | yLoc_sfaeh>100 | yLoc_skntr>100 | yLoc_senga>100)
***10*drop individuals with missings in the siblings variable
	drop if b_nrsiblings==.
***11*keep core sample
	keep if  (b_cohort1==1 | youth==1) & b_eduwest==1

*****************************************************************************************************************************************************************************
*VARIABLES USED FOR THE MCMC CODE: */

local varused_youth_measP	           "y_Po1 y_Po2 y_Po3 y_Pc1 y_Pc2 y_Pc3 y_Pe1 y_Pe2 y_Pe3 y_Pa1 y_Pa2 y_Pa3 y_Pn1 y_Pn2 y_Pn3 y_analog y_rechenz y_matrize"
local varused_youth_meas             "yLoc_sverl yLoc_serre yLoc_sglue yLoc_sand yLoc_shart yLoc_szwei yLoc_ssozu yLoc_sfaeh yLoc_skntr yLoc_senga"
local varused_youth_covariates_meas  "b_lcity b_mcity b_scity north south y_gymempfeh y_hauptempfeh nr_siblings_all b_broken  b_fedu_gym b_fedu_do b_medu_gym b_medu_do"
local varused_adults_covariates_meas "b_broken b_fedu_hs b_fedu_gym b_medu_hs b_medu_gym north south b_lcity b_mcity b_scity"
local varused_adult_out              "o_lmparta o_lnhrlywagea" 
local varused_adult_covariates_out   "b_age b_unemprate b_lcity b_mcity b_scity north south b_married b_broken b_fedu_gym b_fedu_do b_medu_gym b_medu_do b_kids"
local varused_covariates_schooling   "b_lcity b_mcity b_scity north south b_age26_30 b_age31_35 unemp_edu b_broken b_fedu_gym b_fedu_do b_medu_gym b_medu_do"

*****// Local variables for Locus of control****
local Locus       "Loc1 Loc2 Loc3 Loc4 Loc5 Loc6 Loc7 Loc8 Loc9 Loc10"
local Locus_youth "yLoc_sverl yLoc_serre yLoc_sglue yLoc_sand yLoc_shart yLoc_szwei yLoc_ssozu yLoc_sfaeh yLoc_skntr yLoc_senga"


*********************************************Restrict sample to individuals with nonmissings in measurement system and outcomes system***************************************************************************

*//// Drop all youths with missing locus of control measures or missing covariates in the measurement system
	cap gen misscov=1
	foreach cov of local varused_youth_covariates_meas {
		replace misscov = misscov*`cov'
		}
	foreach v of local Locus_youth {
			drop if (`v'==. |  misscov==.) & youth==1
			}
*Drop everybody who has missings in the schooling equation covariates (youths+ adults) 			
	replace misscov=1
	foreach cov of local varused_covariates_schooling {
		qui replace misscov = misscov*`cov'
		}
	drop if misscov==.
*Drop all adults with missings in the wage equation
	replace misscov=1
	foreach cov of local varused_adult_covariates_out {
		qui replace misscov = misscov*`cov'
		}
	drop if misscov==. & youth==0
	replace misscov=1	
	foreach cov of local varused_adult_out{
		qui replace misscov = misscov*`cov'
		}
	drop if misscov==. & youth==0

	
*****************************************************************************************************************	
	save "$datapatha\data3.dta", replace

**************************************************************************************************************************
