% First attempt to compute a panel VAR

% pause(60*10) 

clear variables structures
% Data source
% load MergedMDPFirmChar
VAR_SET=13; %3 is our default from the previous. % 1 includes house prices % 2 & 20 include migration 4 has patents
EndYear=2013;
StartYear=1986;
load D:\MSAs\msa_data;
PostFix='';

my_config_data_periphery;

%%
close all
v_mig_demeaned = v_migrant_rate_exm-ones(T,1)*nanmean(v_migrant_rate_exm,1);
dlog_pop_demeaned = 100*(dlog_pop-ones(T,1)*nanmean(dlog_pop,1));

Bartik_Startup_demeaned = vZit_Bartik_jc -ones(T,1)*nanmean(vZit_Bartik_jc,1);
Bartik_Overall_demeaned = vZit_Bartik -ones(T,1)*nanmean(vZit_Bartik,1);

Startup_JC_demeaned = vjob_creation_rate_births -ones(T,1)*nanmean(vjob_creation_rate_births,1);
Overall_Epop_demeaned = vlog_emp_pop -ones(T,1)*nanmean(vlog_emp_pop,1);

CorrByMSA=NaN(N,6);
for nn=1:N
    CorrByMSA(nn,1) = corr(v_mig_demeaned(vYear>=StartYear & vYear<=EndYear,nn),dlog_pop(vYear>=StartYear & vYear<=EndYear,nn));
    CorrByMSA(nn,2) = corr(v_mig_demeaned(vYear>=StartYear & vYear<=EndYear,nn),dlog_pop(vYear>=StartYear & vYear<=EndYear,nn), 'type', 'Spearman');
    CorrByMSA(nn,3:4) = [dlog_pop_demeaned(vYear>=StartYear & vYear<=EndYear,nn),ones(EndYear-StartYear+1,1)]\(v_mig_demeaned(vYear>=StartYear & vYear<=EndYear,nn));
    rank_beta = my_rank_estimate([dlog_pop_demeaned(vYear>=StartYear & vYear<=EndYear,nn),ones(EndYear-StartYear+1,1)], v_mig_demeaned(vYear>=StartYear & vYear<=EndYear,nn));
    CorrByMSA(nn,5:6)=rank_beta';
end;
hndl_hist=histogram(CorrByMSA(:,3),20, 'Normalization', 'pdf');
hold all
grid on;
[F,XI]=ksdensity(CorrByMSA(:,5),  'Function', 'pdf');
hndl_density=plot(XI,F, 'LineWidth', 3);
legend({'OLS';'linear rank-regression'}, 'Location', 'Best')
legend('boxoff')
set(findall(gcf,'type','axes'),'fontsize',14);
xlabel('slope');
ylabel('density across MSAs');
print('-depsc', ['Graphs/Migration_slopes.eps']);

figure;
hndl_hist=histogram(CorrByMSA(:,1),20, 'Normalization', 'pdf');
hold all
grid on;
[F,XI]=ksdensity(CorrByMSA(:,2),  'Function', 'pdf', 'support', [-1,1.05]);
hndl_density=plot(XI,F, 'LineWidth', 3);
legend({'Pearson''s';'Spearman''s'}, 'Location', 'Best')
legend('boxoff')
set(findall(gcf,'type','axes'),'fontsize',14);
print('-depsc', ['Graphs/Migration_corrs.eps']);

figure;
% dlog_pop_demeaned = dlog_pop_demeaned-nanmean(dlog_pop_demeaned,2)*ones(1,N);
% v_mig_demeaned = v_mig_demeaned-nanmean(v_mig_demeaned,2)*ones(1,N);

XX=[reshape(dlog_pop_demeaned(vYear>=StartYear & vYear<=EndYear,:),(EndYear-StartYear+1)*N,1), ones((EndYear-StartYear+1)*N,1)];
YY= reshape(v_mig_demeaned(vYear>=StartYear & vYear<=EndYear,:),(EndYear-StartYear+1)*N,1);
beta=XX\YY;

corr(XX*beta, YY).^2
scatter(dlog_pop_demeaned(:),v_mig_demeaned(:));
hold all
hndl_ols=plot(dlog_pop_demeaned(:), beta(2)+dlog_pop_demeaned(:)*beta(1), 'LineWidth', 3);

rank_beta = my_rank_estimate(XX,YY);
hold all;
hndl_45=plot(xlim, xlim, '--k');
hndl_rank=plot(dlog_pop_demeaned(:), rank_beta(2)+dlog_pop_demeaned(:)*rank_beta(1), 'LineWidth', 3);
plot(dlog_pop_demeaned(:), mean(CorrByMSA(:,4))+dlog_pop_demeaned(:)*mean(CorrByMSA(:,3)), 'LineWidth', 3, 'Color', hndl_ols.Color, 'LineStyle', '--')
plot(dlog_pop_demeaned(:), median(CorrByMSA(:,6))+dlog_pop_demeaned(:)*median(CorrByMSA(:,5)), 'LineWidth', 3, 'Color', hndl_rank.Color, 'LineStyle', '--')
legend([hndl_45;hndl_ols; hndl_rank], {'45 degree line'; ['OLS: Slope=' num2str(beta(1), '%1.2f')];['Linear rank regression: Slope=' num2str(rank_beta(1), '%1.2f')]}, 'Location', 'Best')
grid on;
ylabel('net migration rate (%)');
xlabel('population growth (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

print('-depsc', ['Graphs/Migration_scatter.eps']);
% corr(XX*beta, YY, 'type', 'Spearman').^2
% corr(XX*rank_beta, YY, 'type', 'Spearman').^2
% 
% corr(XX*beta, YY, 'type', 'Pearson').^2
% corr(XX*rank_beta, YY, 'type', 'Pearson').^2

%%
close all
TargetPrctiles = prctile(vpop(vYear==1986,:), 0:10:100);
TargetIDX = NaN(11,1);
for ii=1:11
    [~,TargetIDX(ii)]=min( abs(vpop(vYear==1986,:)-TargetPrctiles(ii)));
    figure;
    hndl=plot(vYear, [v_mig_demeaned(:,TargetIDX(ii)), dlog_pop_demeaned(:,TargetIDX(ii))],'LineWidth',3);
    set(hndl, {'LineStyle'}, {'-';'-.'});
    ylabel('demeaned rate (%)');
    legend({'net migration rate'; 'population growth'}, 'Location', 'Best')
    set(findall(gcf,'type','axes'),'fontsize',14);
    grid on;
    title(regexprep(cMSA{TargetIDX(ii)}, 'Metropolitan Statistical Area', ''));
    print('-depsc', ['Graphs/Migration_ts_p' num2str( 10*(ii-1) ) '.eps']);

    figure;
    hndl=plot(vYear, log10([vpop(:,TargetIDX(ii)),vemp(:,TargetIDX(ii)), vZit_ageall_emp_msa(:,TargetIDX(ii))]),'LineWidth',3);
    set(hndl, {'LineStyle'}, {'--';'-';'-.'});
    ylabel('Thousands (log-scale)');
    legend({'population';'CBP employment'; 'BDS employment'}, 'Location', 'Best')
    set(findall(gcf,'type','axes'),'fontsize',14);
    set(gca, 'YTickLabel', num2str(round(50*(10.^get(gca, 'YTick')'))/50/1000, '%1.0f'))
    grid on;
    title(regexprep(cMSA{TargetIDX(ii)}, 'Metropolitan Statistical Area', ''));
    print('-depsc', ['Graphs/Emp_ts_p' num2str( 10*(ii-1) ) '.eps']);

    figure;
    hndl=plot(vYear, ([vjob_creation_rate_births_lvl(:,TargetIDX(ii)), vjob_creation_rate_births(:,TargetIDX(ii)),vfirm_entry_rate(:,TargetIDX(ii)),vfirm_exit_rate(:,TargetIDX(ii))]),'LineWidth',3);
    set(hndl, {'LineStyle'}, {'-';'-';'--';'-.'});
    ylabel('%');
    legend({'job creation rate (level)'; '\Delta job creation rate';'firm entry rate'; 'firm exit rate'}, 'Location', 'Best')
    set(findall(gcf,'type','axes'),'fontsize',14);
%     set(gca, 'YTickLabel', num2str(round(50*(10.^get(gca, 'YTick')'))/50/1000, '%1.0f'))
    grid on;
    title(regexprep(cMSA{TargetIDX(ii)}, 'Metropolitan Statistical Area', ''));
    print('-depsc', ['Graphs/Startups_ts_p' num2str( 10*(ii-1) ) '.eps']);
end;
close all

%%
close all
Bartik_Startup_demeaned = Bartik_Startup_demeaned - nanmean(Bartik_Startup_demeaned,2)*ones(1,N);
Startup_JC_demeaned = Startup_JC_demeaned - nanmean(Startup_JC_demeaned,2)*ones(1,N);
% scatter(Bartik_Startup_demeaned(:) , Startup_JC_demeaned(:))

Winsor_Cutoff=1/2;
% 
Winsorize_Startup_JC_demeaned = reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Startup_JC_demeaned(Winsorize_Startup_JC_demeaned<prctile(Winsorize_Startup_JC_demeaned ,Winsor_Cutoff))=prctile(reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Startup_JC_demeaned(Winsorize_Startup_JC_demeaned>prctile(Winsorize_Startup_JC_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Startup_JC_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

Winsorize_Bartik_Startup_demeaned = reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Bartik_Startup_demeaned(Winsorize_Bartik_Startup_demeaned<prctile(Winsorize_Bartik_Startup_demeaned ,Winsor_Cutoff))=prctile(reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Bartik_Startup_demeaned(Winsorize_Bartik_Startup_demeaned>prctile(Winsorize_Bartik_Startup_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Bartik_Startup_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

XX=[Winsorize_Bartik_Startup_demeaned,  ones((EndYear-StartYear+1)*N,1)];
YY= Winsorize_Startup_JC_demeaned;
beta=XX\YY;

XX_res=ones((EndYear-StartYear+1)*N,1);
res_beta=XX_res\YY;

RSS_res = sum( (YY - XX_res * res_beta).^2 ) ;
RSS = sum( (YY - XX * beta).^2 ) ;

F= (RSS_res - RSS) / (RSS / (size(YY ,1) - 2))

corr(XX*beta, YY).^2

scatter( Winsorize_Bartik_Startup_demeaned ,Winsorize_Startup_JC_demeaned(:));
hold all
hndl_ols=plot(Winsorize_Bartik_Startup_demeaned, beta(2)+Winsorize_Bartik_Startup_demeaned*beta(1), 'LineWidth', 3);

rank_beta = my_rank_estimate(XX,YY);

% hndl_ols=plot(Winsorize_Bartik_Startup_demeaned, rank_beta(2)+Winsorize_Bartik_Startup_demeaned*rank_beta(1), 'LineWidth', 3);

hold all;
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')
grid on;

title(num2str(F, 'F-stat = %1.2f'));
ylabel('\Delta job creation rate (%)');
xlabel('start-up job creation Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['../Graphs/Bartik_Startup.eps']);

[BinSize,EDGES] = histcounts(Winsorize_Bartik_Startup_demeaned);
Binned_Y=NaN(length(EDGES)-1,1);
for nn=1:length(EDGES)-1
    Binned_Y(nn) = mean(Winsorize_Startup_JC_demeaned(EDGES(nn)<=Winsorize_Bartik_Startup_demeaned & Winsorize_Bartik_Startup_demeaned<=EDGES(nn+1)));
end;

close all;
scatter( (EDGES(1:end-1) + EDGES(2:end))/2, Binned_Y,BinSize)
hold all;
hndl_ols=plot((EDGES(1:end-1) + EDGES(2:end))/2, rank_beta(2)+(EDGES(1:end-1) + EDGES(2:end))/2*rank_beta(1), 'LineWidth', 3);
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')

grid on;

title(num2str(F, 'F-stat = %1.2f'));
ylabel('\Delta job creation rate (%)');
xlabel('start-up job creation Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['../Graphs/Bartik_Startup_binned.eps']);
%%
close all
Bartik_Overall_demeaned = Bartik_Overall_demeaned - nanmean(Bartik_Overall_demeaned,2)*ones(1,N);
Overall_Epop_demeaned = Overall_Epop_demeaned - nanmean(Overall_Epop_demeaned,2)*ones(1,N);
% scatter(Bartik_Overall_demeaned(:) , Overall_Epop_demeaned(:))

% Winsor_Cutoff=0.1;

% Winsor_Cutoff=1/2;
Winsorize_Overall_Epop_demeaned = reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Overall_Epop_demeaned(Winsorize_Overall_Epop_demeaned<prctile(Winsorize_Overall_Epop_demeaned ,Winsor_Cutoff))=prctile(reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Overall_Epop_demeaned(Winsorize_Overall_Epop_demeaned>prctile(Winsorize_Overall_Epop_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Overall_Epop_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

Winsorize_Bartik_Overall_demeaned = reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])';
Winsorize_Bartik_Overall_demeaned(Winsorize_Bartik_Overall_demeaned<prctile(Winsorize_Bartik_Overall_demeaned ,Winsor_Cutoff))=prctile(reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',Winsor_Cutoff);
Winsorize_Bartik_Overall_demeaned(Winsorize_Bartik_Overall_demeaned>prctile(Winsorize_Bartik_Overall_demeaned ,100-Winsor_Cutoff))=prctile(reshape(Bartik_Overall_demeaned(vYear>=StartYear & vYear<=EndYear,:), 1, [])',100-Winsor_Cutoff);

XX=[Winsorize_Bartik_Overall_demeaned,  ones((EndYear-StartYear+1)*N,1)];
YY= Winsorize_Overall_Epop_demeaned;
beta=XX\YY;

XX_res=[ones((EndYear-StartYear+1)*N,1)];
res_beta=XX_res\YY;

RSS_res = sum( (YY - XX_res * res_beta).^2 ) ;
RSS = sum( (YY - XX * beta).^2 ) ;

F= (RSS_res - RSS) / (RSS / (size(YY ,1) - 2))

scatter( Winsorize_Bartik_Overall_demeaned ,Winsorize_Overall_Epop_demeaned(:));
hold all
hndl_ols=plot(Winsorize_Bartik_Overall_demeaned, beta(2)+Winsorize_Bartik_Overall_demeaned*beta(1), 'LineWidth', 3);

rank_beta = my_rank_estimate(XX,YY);

% hndl_ols=plot(Winsorize_Bartik_Overall_demeaned, rank_beta(2)+Winsorize_Bartik_Overall_demeaned*rank_beta(1), 'LineWidth', 3);

hold all;
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')

title(num2str(F, 'F-stat = %1.2f'));

grid on;
ylabel('employment to population ratio (%)');
xlabel('overall labor demand Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['Graphs/Bartik_Overall.eps']);

[BinSize,EDGES] = histcounts(Winsorize_Bartik_Overall_demeaned);
Binned_Y=NaN(length(EDGES)-1,1);
for nn=1:length(EDGES)-1
    Binned_Y(nn) = mean(Winsorize_Overall_Epop_demeaned(EDGES(nn)<=Winsorize_Bartik_Overall_demeaned & Winsorize_Bartik_Overall_demeaned<=EDGES(nn+1)));
end;

close all;
scatter( (EDGES(1:end-1) + EDGES(2:end))/2, Binned_Y,BinSize)
hold all
hndl_ols=plot((EDGES(1:end-1) + EDGES(2:end))/2, beta(2)+(EDGES(1:end-1) + EDGES(2:end))/2*beta(1), 'LineWidth', 3);
grid on;
old_xlim=xlim;
old_ylim=ylim;
plot(xlim, xlim,'--k')

title(num2str(F, 'F-stat = %1.2f'));
ylabel('employment to population ratio (%)');
xlabel('overall labor demand Bartik (%)');
set(findall(gcf,'type','axes'),'fontsize',14);

axis([old_xlim old_ylim]);
print('-depsc', ['Graphs/Bartik_Overall_binned.eps']);
% = vZit_Bartik_jc -ones(T,1)*nanmean(vZit_Bartik_jc,1);
% Bartik_Overall_demaned 