
data edyr;
	merge inc (keep = id cm yr age) hdc hgc;
	by id cm;
run;
data edyr;
	set edyr;
	by id yr;
	retain hdc2;
	if first.id then hdc2 = .;
	if hdc>=4 then hdc2 = 4;
run;
data edyr;
	set edyr;
	if hdc2 ne . then hdc = hdc2;
	drop hdc2;
run;

data edatt;
	set interview (in=a where=(date ne .));
	by id date;
	yr =year(date);
	if month(date)<9 then yr=yr-1;
	keep id yr cv_enrollstat;
run;
data edatt;
	set edatt;
	by id yr;
	if first.yr;
run;
data edatt;
	merge edatt hist_edatt (keep = id yr ColAttend Nmon);
	by id yr;
	if Nmon<12 and cv_enrollstat in (9,10,11) then colattend = 1;
	drop cv_enrollstat Nmon;
run;

data edyr;
	merge edyr (in=a) edatt
		  ed_acadyr (keep=id yr grade_attended grade_progression days_suspended);
	by id yr;
	if a;
run;

data edgap;
	merge edyr 
		  edyr (firstobs = 2 keep=id colattend hgc 
			rename=(id=lead1_id colattend=lead1_colattend hgc=lead1_hgc))
		  edyr (firstobs = 3 keep=id colattend hgc 
			rename=(id=lead2_id colattend=lead2_colattend hgc=lead2_hgc));
	lag1_id = lag(id);
	lag1_colattend = lag(colattend);
	lag1_hgc = lag(hgc);
	if lag1_id ne id then do;
		lag1_colattend = .;
		lag1_hgc = .;
	end;
	if lead1_id ne id then do;
		lead1_colattend = .;
		lead1_hgc = .;
	end;
	if lead2_id ne id then do;
		lead2_colattend = .;
		lead2_hgc = .;
	end;
	drop lag1_id lead1_id lead2_id;
run;

data gap1;
	set edgap;
	where hgc=. and lag1_hgc>=12 and lead1_hgc>0;

	if lag1_hgc>=lead1_hgc then hgc = lag1_hgc;
	else if lag1_hgc=lead1_hgc-1 then do;
		if lag1_colattend>=.75 then hgc = lag1_hgc + 1;
			else if lag1_colattend<colattend then hgc = lag1_hgc;
			else hgc = lag1_hgc+1;
		end;
	else if lag1_hgc<=lead1_hgc-2 then hgc = lag1_hgc + 1;
	keep id yr hgc;
run;

data gap2;
	set edgap;
	where hgc=. and lag1_hgc>=12 and lead1_hgc=. and lead2_hgc>0;
	
	if lag1_hgc>=lead2_hgc then do;
		hgc1 = lag1_hgc;
		hgc2 = lag1_hgc;
	end;
	else if (lead2_hgc-lag1_hgc) in (1,2) then do;
		if lag1_colattend>=.75 then hgc1 = lag1_hgc + 1;
			else if lag1_colattend<colattend then hgc1 = lag1_hgc;
			else if lag1_colattend<lead1_colattend then hgc1 = lag1_hgc;
			else hgc1 = lag1_hgc+1;
	end;
	else if (lead2_hgc-lag1_hgc)>=3 then do;
		hgc1 = lag1_hgc + 1;
		hgc2 = lag1_hgc + 2;
	end;
	if hgc2 = . then do;
		if hgc1 = lead2_hgc then hgc2 = hgc1;
		else if (lead2_hgc-hgc1)=1 then do;
			if colattend>=.75 then hgc2 = hgc1 + 1;
			else if colattend<lead1_colattend then hgc2 = hgc1;
			else hgc2 = hgc1+1;
		end;
		else if (lead2_hgc-hgc1)=2 then hgc2 = hgc1 + 1;
	end;

run;
proc transpose data = gap2 out=gap2 (rename=(col1=hgc));
	by id yr;
	var hgc1 hgc2;
run;
data gap2;
	set gap2;
	addyr = substr(_name_,4,1)-1;
	yr = yr + addyr;
	drop _name_ addyr;
run;
	
data edyr2;
	merge edyr gap1 gap2;
	by id yr;
run;

proc sort data=edyr2 out = hgc9; by id descending yr; run;
data hgc9;
	set hgc9;
	by id descending yr;
	retain hgcLT9;
	if first.id then hgcLT9 = 0;
	if 0<hgc<9 then hgcLT9 = 1;
run;
proc sort data = hgc9; by id yr; run;
data hgc9;
	set hgc9;
	by id yr;
	laghgc = lag(hgc);
	if first.id=0 and 14.5<=age<=17.5 and laghgc = 8 and hgc = 9 and hgcLT9=0;
	keep id yr age;
	rename age=agehgc9 yr = yrhgc9;
run;
data hgc9;
	set hgc9;
	by id agehgc9;
	if first.id;
run;


data prom;
	merge edyr2 hgc9 (in = b keep = id yrhgc9);
	by id;
	
	if b and yr>=yrhgc9;
	
	if 0<=hdc<=1 and hgc=12 then hgc = 11;
	if 0<=hdc<=3 and hgc=16 then hgc = 15;
	if hdc>=4 and hgc>=16 then hgc = 16;
	
	age = floor(age);

	keep id yr hgc hdc age;
	
run;
data prom;
	set prom;
	by id age;
	retain hgc2;
	if first.id then hgc2 = .;
	if hgc = 16 then hgc2 = 16;
run;
data prom;
	set prom;
	if hgc2 ne . then hgc = hgc2;
	drop hgc2;
run;
proc freq data = prom; tables hgc*hdc / missing; run;
proc sort data = prom; by id descending yr; run;
data prom;
	set prom;
	by id descending yr;
	nexthgc = lag(hgc);
	EdPromotion = nexthgc>HGC;
	if first.id or hgc=. or nextHGC = . then EdPromotion = .;
run;
proc sort data = prom; by id yr; run;

data prom2 (where=(drprec=0)) drpsamp (where=(drprec>0));
	set prom;
	by id yr;
	retain drprec;
	if first.id then drprec = 0;
	if age>=25 then drprec = 1;
		else if last.id then drprec = 2;
		else if EdPromotion = . then drprec = 3;
		else if hgc>=16 and hdc<=3 then drprec = 4;
		else if 1<=hgc<=15 and hdc>=4 then drprec = 5;
		else if 1<=hgc<=11 and hdc=2 then drprec = 6;
run;

data drpsamp;
	set drpsamp (keep=id yr age drprec);
	by id yr;
	if first.id;
run;
proc freq data = drpsamp; tables drprec age; run;
	
proc freq data = prom2; tables hgc*hdc /missing; run;
proc freq data = prom2; tables hgc*nexthgc /missing; run;

data prom2;
	set prom2 (drop=drprec nexthgc hdc);
	if hgc>=16 then EdPromotion=.;
run;
proc freq data = prom2; tables hgc*EdPromotion / missing; run;


data hist_ed;
	merge edyr2 (keep = id yr age hgc grade_attended grade_progression days_suspended)
	prom2 (keep = id yr hgc Edpromotion rename=(hgc=hgcProm));
	by id yr;
run;

proc datasets; delete edyr edyr2 hgc9 edgap gap1 gap2 prom prom2; run;

