% % Binned scatter plots 
% clear all
% VAR_SET=130; 
% EndYear=2013;
% StartYear=1986;
% load ../Data/msa_data_jae;
% Suffix ='';
% 
% my_config_data_periphery;
% load VAR_DATA
clear all;
close all;
% load ../Data/msa_data_jae.mat
load ../Data/VAR_DATA.mat
[T,N] = size(dlog_pop)
StartYear=1986;
EndYear=2014;
%%
close all

Bartik_Startup_demeaned = vZit_Bartik_jc -ones(T,1)*nanmean(vZit_Bartik_jc,1);
Bartik_Overall_demeaned = vZit_Bartik -ones(T,1)*nanmean(vZit_Bartik,1);

Startup_JC_demeaned = vjob_creation_rate_births -ones(T,1)*nanmean(vjob_creation_rate_births,1);
Overall_Epop_demeaned = vlog_emp_pop -ones(T,1)*nanmean(vlog_emp_pop,1);

close all
Bartik_Startup_demeaned = Bartik_Startup_demeaned - nanmean(Bartik_Startup_demeaned,2)*ones(1,N);
Startup_JC_demeaned = Startup_JC_demeaned - nanmean(Startup_JC_demeaned,2)*ones(1,N);
% scatter(Bartik_Startup_demeaned(:) , Startup_JC_demeaned(:))

Winsor_Cutoff=1/2;
% 
Winsorize_Startup_JC_demeaned = reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Startup_JC_demeaned(Winsorize_Startup_JC_demeaned<prctile(Winsorize_Startup_JC_demeaned ,Winsor_Cutoff))=prctile(reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Startup_JC_demeaned(Winsorize_Startup_JC_demeaned>prctile(Winsorize_Startup_JC_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

Winsorize_Bartik_Startup_demeaned = reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Bartik_Startup_demeaned(Winsorize_Bartik_Startup_demeaned<prctile(Winsorize_Bartik_Startup_demeaned ,Winsor_Cutoff))=prctile(reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Bartik_Startup_demeaned(Winsorize_Bartik_Startup_demeaned>prctile(Winsorize_Bartik_Startup_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

XX=[Winsorize_Bartik_Startup_demeaned,  ones((EndYear-StartYear+1)*N,1)];
YY= Winsorize_Startup_JC_demeaned;
beta=XX\YY;

XX_res=ones((EndYear-StartYear+1)*N,1);
res_beta=XX_res\YY;

RSS_res = sum( (YY - XX_res * res_beta).^2 ) ;
RSS = sum( (YY - XX * beta).^2 ) ;

F= (RSS_res - RSS) / (RSS / (size(YY ,1) - 2));


rank_beta = my_rank_estimate(XX,YY);
 

[BinSize,EDGES] = histcounts(Winsorize_Bartik_Startup_demeaned);
Binned_Y=NaN(length(EDGES)-1,1);
for nn=1:length(EDGES)-1
    Binned_Y(nn) = mean(Winsorize_Startup_JC_demeaned(EDGES(nn)<=Winsorize_Bartik_Startup_demeaned & Winsorize_Bartik_Startup_demeaned<=EDGES(nn+1)));
end;

close all;
scatter( (EDGES(1:end-1) + EDGES(2:end))/2, Binned_Y,BinSize)
hold all;
hndl_ols=plot((EDGES(1:end-1) + EDGES(2:end))/2, rank_beta(2)+(EDGES(1:end-1) + EDGES(2:end))/2*rank_beta(1), 'LineWidth', 3);
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')

grid on;

title(num2str(F, 'F-stat = %1.2f'));
ylabel('\Delta job creation rate (%)');
xlabel('start-up job creation Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['../Graphs/Bartik_Startup_binned.eps']);
%%
close all
Bartik_Overall_demeaned = Bartik_Overall_demeaned - nanmean(Bartik_Overall_demeaned,2)*ones(1,N);
Overall_Epop_demeaned = Overall_Epop_demeaned - nanmean(Overall_Epop_demeaned,2)*ones(1,N);
% scatter(Bartik_Overall_demeaned(:) , Overall_Epop_demeaned(:))

% Winsor_Cutoff=0.1;

% Winsor_Cutoff=1/2;
Winsorize_Overall_Epop_demeaned = reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Overall_Epop_demeaned(Winsorize_Overall_Epop_demeaned<prctile(Winsorize_Overall_Epop_demeaned ,Winsor_Cutoff))=prctile(reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Overall_Epop_demeaned(Winsorize_Overall_Epop_demeaned>prctile(Winsorize_Overall_Epop_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

Winsorize_Bartik_Overall_demeaned = reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Bartik_Overall_demeaned(Winsorize_Bartik_Overall_demeaned<prctile(Winsorize_Bartik_Overall_demeaned ,Winsor_Cutoff))=prctile(reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Bartik_Overall_demeaned(Winsorize_Bartik_Overall_demeaned>prctile(Winsorize_Bartik_Overall_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

XX=[Winsorize_Bartik_Overall_demeaned,  ones((EndYear-StartYear+1)*N,1)];
YY= Winsorize_Overall_Epop_demeaned;
beta=XX\YY;

XX_res=[ones((EndYear-StartYear+1)*N,1)];
res_beta=XX_res\YY;

RSS_res = sum( (YY - XX_res * res_beta).^2 ) ;
RSS = sum( (YY - XX * beta).^2 ) ;

F= (RSS_res - RSS) / (RSS / (size(YY ,1) - 2));

rank_beta = my_rank_estimate(XX,YY);


[BinSize,EDGES] = histcounts(Winsorize_Bartik_Overall_demeaned);
Binned_Y=NaN(length(EDGES)-1,1);
for nn=1:length(EDGES)-1
    Binned_Y(nn) = mean(Winsorize_Overall_Epop_demeaned(EDGES(nn)<=Winsorize_Bartik_Overall_demeaned & Winsorize_Bartik_Overall_demeaned<=EDGES(nn+1)));
end;

close all;
scatter( (EDGES(1:end-1) + EDGES(2:end))/2, Binned_Y,BinSize)
hold all
hndl_ols=plot((EDGES(1:end-1) + EDGES(2:end))/2, beta(2)+(EDGES(1:end-1) + EDGES(2:end))/2*beta(1), 'LineWidth', 3);
grid on;
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')

title(num2str(F, 'F-stat = %1.2f'));
ylabel('employment to population ratio (%)');
xlabel('overall labor demand Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['../Graphs/Bartik_Overall_binned.eps']);
% = vZit_Bartik_jc -ones(T,1)*nanmean(vZit_Bartik_jc,1);
% Bartik_Overall_demaned 