/* this is a do file written to match the data requirement of version XV of the model 
In this version:
1. individuals stay in the sample until labor market history is known .
2. keep them even if wages are missing. 
3. include some parental background variables. 
4. use both maths and reading skills. */


#delimit;                                          
version 8;                          
macro drop _all; 

use ncdsF,replace;

sort serial;
drop if serial[_n]==serial[_n-1];  /* check repeated serial */

/****************************************************/
/*** EDUCATION ***/
/****************************************************/

/* England and Wales Academic Education */       

tempvar GCSE; /* define tempvar for OLev (A-E) + CSE passed */
ge `GCSE'=0 if  (e430>=0 & e430!=.) | (e434>=0 & e434!=.);
foreach var of varlist 
e443 e446 e449 e452 e455 e458 e461 e464 e467 e470 e473 e476 e479 e482 e485 e488 e491 e494 e497 e500 e503 {;
replace `GCSE'=`GCSE'+inrange(`var',1,3);
};

cap drop Exa; 
qui ge Exa=0 if (e430>=0 & e430!=.) | (e434>=0 & e434!=.);                  /* O Levels or CSE taken anywhere */
qui replace Exa=1 if Exa!=. & e430+e434>=5;                                 /* 5 or more sit for O Levels and CSE */ 
qui replace Exa=2 if Exa!=. & e442>=3;                                      /* 3 or more sit for A Levels */
label var Exa " Exams taken ";                                      

cap drop Aca; 
qui ge Aca=0 if Exa!=.;                                                     /* sit any exam */
qui replace Aca=1 if Aca!=. & (e236>=5 & e236!=.);                          /* if # (O Lev A-C | CSE 1) >=5 */ 
qui replace Aca=1 if Aca!=. & (e236>=3 & e236!=.) & (`GCSE'>=5 & `GCSE'!=.);/* if # (O Lev A-C | CSE 1) >=3 */ 
qui replace Aca=2 if Aca!=. & (e349>=3 & e349!=.) & (e363>=3 & e363!=.);    /* if # (A Lev grade A-D) >=3 */
qui replace Aca=3 if Aca==2 & educatA4_F==3;                                /* got degree */   
qui replace Aca=3 if Aca==2 & educatA5_F==3 & educatA4_F==.;                /* if have Hi Ed */
qui replace Aca=3 if Aca==2 & educatA6_F==3 & educatA5_F==. & educatA6_F==.;/* if have Hi Ed */
label var Aca " Academic qualification ";                                                      

qui replace Exa=3 if Aca==3;                                                /* degree */
qui replace Exa=. if Exa-Aca>1;                                             /* recode to missing strange cases */
qui replace Exa=. if Aca>Exa;
qui replace Aca=. if Exa==.;          

cap drop Fail; 
qui ge Fail=0 if Aca==2 & sex_F==1;
qui replace Fail=1 if Aca==2 & sex_F==1 & n4575==2 & n4572==1 & inlist(n4573,25,26,27,29,30,31);
qui replace Fail=1 if Aca==2 & sex_F==1 & n4624==2 & n4621==1 & inlist(n4622,25,26,27,29,30,31);
qui replace Fail=1 if Aca==2 & sex_F==1 & n4636==2 & n4633==1 & inlist(n4634,25,26,27,29,30,31);
tab Fail if Aca==2; 


/****************************************************/
/*** HISTORY ***/
/****************************************************/      

run historyXV.do; 
drop if Sta2==.;
     
/****************************************************/        
/*** SAMPLE SELECTION ***/
/****************************************************/        

keep if sex_F==1;                               /* males */
keep if abiM2_F!=. & abiR2_F~=.;                /* select if ability at age 16 */  
keep if abiM0_F!=. & abiR0_F~=.;                /* select if ability at age 7 */  
drop if Exa==.;                                 /* no education information */ 
*drop if StaM==1;                                /* no status information */
drop StaM;                                                    
*drop if WagM>2;                                 /* no wage information */
drop WagM;                
drop if econ4==2 | inlist(econ5,3,4) | inlist(econ6,3,4); /* drop if self-employed at same point in time */
*drop if YT_F==.;                                /* no parental income */
*drop if YT_F==0;                                /* recoded to 0 but no parental income */

/* further selection on status in PANEL DATA section */

/****************************************************/ 
/*** SKILLS ***/
/************************************F****************/      

foreach var of varlist abiM2_F abiR2_F abiM0_F abiR0_F {; /* standardize variable first */
    qui sum `var' if `var'~=.;
    qui ge `var'sd=(`var'-r(mean))/r(sd);
};

pca abiM2_Fsd abiR2_Fsd; score Abi2; /* take first principal component (standardized) */
pca abiM0_Fsd abiR0_Fsd; score Abi0; 

/****************************************************/
/*** SELECT VARIABLES ***/
/************************************F****************/      
                 
* do graphicsXV; 

keep serial
Sta*
Sch*
Qua*
Exp*
Wag*
Abi2
Abi0
YT_F;                         

/****************************************************/
/*** PANEL DATA ***/
/****************************************************/        

/* prepare data in panel form */

/*** RESHAPE ***/
reshape long Sta Sch Qua Exp Wag, i(serial) j(Prd);  /* give panel data shape */  
drop if Sta==.;    /* Drop if Status is missing */

preserve;
keep serial;
sort serial;
drop if serial[_n]==serial[_n-1]; 
save C:/supermario/Stata/dynamic/individuals,replace;
restore;   

/****************************************************/
/*** TRANSITIONS ***/
/****************************************************/        

/*** TYPE ***/
cap drop Typ;       
qui ge Typ=0;    

sort Prd;

bysort serial (Prd): replace Typ=1  if Sta==2 & Sta[_n-1]==2;                                  /* tr W -> W */
bysort serial (Prd): replace Typ=2  if Sta==2 & Sta[_n-1]==3;                                  /* tr U -> W */
bysort serial (Prd): replace Typ=3  if Sta==3 & Sta[_n-1]==3;                                  /* tr U -> U */
bysort serial (Prd): replace Typ=4  if Sta==3 & Sta[_n-1]==2;                                  /* tr W -> U */

bysort serial (Prd): replace Typ=5  if Sta==1 & Sta[_n-1]==. & Sch==1 & Sch[_n-1]==. & Qua==0; /* tr S0 -> S1 */       
bysort serial (Prd): replace Typ=6  if Sta==3 & Sta[_n-1]==. & Sch==0 & Sch[_n-1]==. & Qua==0; /* tr S0 -> U */

bysort serial (Prd): replace Typ=7  if Sta==1 & Sta[_n-1]==1 & Sch==2 & Sch[_n-1]==1 & Qua==1; /* tr S1 -> S2 | Q1=1 */       
bysort serial (Prd): replace Typ=8  if Sta==2 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==1 & Qua==1; /* tr S1 -> W | Q1=1 */ 
bysort serial (Prd): replace Typ=9  if Sta==2 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==1 & Qua==0; /* tr S1 -> W | Q1=0 */ 
bysort serial (Prd): replace Typ=10 if Sta==3 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==1 & Qua==1; /* tr S1 -> U | Q1=1 */ 
bysort serial (Prd): replace Typ=11 if Sta==3 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==1 & Qua==0; /* tr S1 -> U | Q1=0 */ 

bysort serial (Prd): replace Typ=12 if Sta==1 & Sta[_n-1]==1 & Sch==2 & Sch[_n-1]==2 & Qua==1; /* tr S2 -> S2 */       
bysort serial (Prd): replace Typ=13 if Sta==1 & Sta[_n-1]==1 & Sch==3 & Sch[_n-1]==2 & Qua==2; /* tr S2 -> S3 | Q2=1*/       
bysort serial (Prd): replace Typ=14 if Sta==2 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==2 & Qua==2; /* tr S2 -> W | Q2=1 */ 
bysort serial (Prd): replace Typ=15 if Sta==2 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==2 & Qua==1; /* tr S2 -> W | Q2=0 */ 
bysort serial (Prd): replace Typ=16 if Sta==3 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==2 & Qua==2; /* tr S2 -> U | Q2=1 */ 
bysort serial (Prd): replace Typ=17 if Sta==3 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==2 & Qua==1; /* tr S2 -> U | Q2=0 */ 

bysort serial (Prd): replace Typ=18 if Sta==1 & Sta[_n-1]==1 & Sch==3 & Sch[_n-1]==3 & Qua==2; /* tr S3 -> S3 */       
bysort serial (Prd): replace Typ=19 if Sta==2 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==3 & Qua==3; /* tr S3 -> W */       
bysort serial (Prd): replace Typ=20 if Sta==3 & Sta[_n-1]==1 & Sch==0 & Sch[_n-1]==3 & Qua==3; /* tr S3 -> U */       
                                                                                          
/*** ID ***/
ge id=_n;
sort serial;
replace id=id[_n-1] if serial[_n]==serial[_n-1];

* drop serial;
order id Abi* YT_F Prd Sta Sch Qua Typ Wag Exp;

/****************************************************/     
/*** LOG ***/
/****************************************************/        

cap log close;
log using versionXV.log,replace;  

de,number;

tab Prd Sta,row; 
tab Prd Sch;
tab Prd Qua;
tab Prd Typ;

bysort Prd: su Wag if Wag!=0;

preserve;
keep if inlist(Prd,8,19,27) & Sta==2 & Wag!=0; 
bysort Prd Qua: su Wag;
restore;

su YT_F; 
sum Abi*;
sort id Prd;

cap log close;
              
/****************************************************/     
/*** SAVE DATA FILE ***/
/****************************************************/        

sort id Prd;
saveold versionXV.dta,replace;               
                                                                                                                       
