%%
clear all;
current_pwd=pwd;
% cd E:\MSAs


%% loop: first, dataset without patent sample restrictions

for RESTRICT_TO_WHARTON=[0 1] % need to always run "0" before "1" or "-1" to make sure the uncensored version of the data is there. 
    [numdata,textdata]=xlsread('../Data/Merged_Data.csv');

    if size(textdata, 1)>1
        error('data file header off? too many text rows');
    end

    N = length(unique(numdata(:,strcmp('msa_id', textdata))));
    T = length(unique(numdata(:,strcmp('year', textdata))));
    numdata=reshape(numdata, T, N,  []);
    if RESTRICT_TO_WHARTON==1
            % use previously read data to drop MSAs with missing patent data
            numdata(:,isnan(v_whartonlandindex(vYear==1980,:)),:)=[];
            DELETE_IDX=isnan(v_whartonlandindex(vYear==1980,:));
            N=numel(DELETE_IDX)-sum(real(DELETE_IDX));
    end

    for mm=1:size(numdata,3)
        assignin('base', textdata{mm}, numdata(:,:,mm));
    end

%     vRegion= v_Northeast + v_South * 2 + v_West * 3 + v_Midwest *4;
    
    vMSA = unique(msa_id);
    vYear = unique(year);

    vemp = vZit_msa_emp;
    vpop = v_pop;

    % There is a problem with Cape May county in the CBP for 1995 and 1996
    v_simple_wage_growth(v_simple_wage_growth(:,vMSA==36140)==-1,vMSA==36140)=NaN;
    v_simple_wage_growth(isnan(v_simple_wage_growth(:,vMSA==36140)),vMSA==36140)=nanmean(v_simple_wage_growth( :, vMSA==36140));
    dlog_wage=v_simple_wage_growth;
    clear v_simple_wage_growth;
    
    % fill in missing observations
    vZit_age0_job_ctn_msa(isnan(vZit_age0_job_ctn_msa))=vZit_age0_job_ctn_births_msa(isnan(vZit_age0_job_ctn_msa));
    vjob_creation_rate_births = 100 * vZit_age0_job_ctn_msa ./ vZit_ageall_denom_msa;
    vjob_creation_growth_births = 100 * [NaN(1,size(vZit_age0_job_ctn_msa,2)); diff(vZit_age0_job_ctn_msa,1)] ./ vZit_ageall_denom_msa;



    v_migrant_rate_rtn = 100*(v_returns_in-v_returns_out)./[NaN(1, size(v_returns_in,2)); vpop(1:end-1,:)];
    v_exemptions_all_out=v_exemptions_out;
    v_exemptions_all_out(isnan(v_exemptions_out))=v_year2exemptions_out(isnan(v_exemptions_out));
    v_exemptions_all_in=v_exemptions_in;
    v_exemptions_all_in(isnan(v_exemptions_in))=v_year2exemptions_in(isnan(v_exemptions_in));

    v_migrant_rate_exm = 100*(v_exemptions_all_in-v_exemptions_all_out)./[NaN(1, size(v_returns_in,2)); vpop(1:end-1,:)];

    clear vZit_msa_emp msa_id v_pop v_sic_wage_msa
    %% get corresponding msa names
    [numdata,textdata]=xlsread('../Data/MSAs.csv');

    [~,JJ]=find(repmat(vMSA(:), 1,length(numdata))==repmat(numdata(:)', length(vMSA),1));
    if ~isequal(size(JJ,1),N)
        error('Mapping to MSA names is not working');
    end
    cMSA=textdata(JJ+1,2);

    switch RESTRICT_TO_WHARTON
        case 0
            save ../Data/msa_data_jae


                tmp_N=length(cMSA);
                [tmp_aa,tmp_bb]=xlsread('../Data/MSA Land Area 2010.xlsx');
                tmp_MSA_id=tmp_aa(:,1);
                tmp_MSA_area=tmp_aa(:,3)/2589988; %m^2 to miles^2

                tmp_area=zeros(N,1);
                for mm=1:N
                    tmp_area(mm,1)= tmp_MSA_area(tmp_MSA_id==vMSA(mm)) ;
                end
                tmp_density=vpop(1:T,:)./(ones(T,1)*tmp_area') ;

                tmp_Initial_Density = num2str(round(tmp_density(1,:))', '& %1.0f');

                tmp_firm_entry_rate=100*[NaN(1,N); 2*vZit_age0_firms_msa(2:end,:)./(vZit_ageall_firms_msa(2:end,:)+vZit_ageall_firms_msa(1:end-1,:))];

                tmp_Initial_firm_entry_rate = num2str(round(10*tmp_firm_entry_rate(2,:))'/10, '& %1.1f');
                tmp_Final_firm_entry_rate = num2str(round(10*tmp_firm_entry_rate(end-1,:))'/10, '& %1.1f');

                tmp_Initial_firm_size = num2str(round(10*vZit_age0_job_ctn_msa(2,:)./vZit_age0_firms_msa(2,:))'/10, '& %1.1f');
                tmp_Final_firm_size = num2str(round(10*vZit_age0_job_ctn_msa(end-1,:)./vZit_age0_firms_msa(end-1,:))'/10, '& %1.1f');


                TableMSA=[char(regexprep(cMSA, 'Metropolitan Statistical Area', '')) ...
                     tmp_Initial_Density, tmp_Initial_firm_entry_rate, tmp_Final_firm_entry_rate, tmp_Initial_firm_size, tmp_Final_firm_size, ...
                     repmat(' \\', length(cMSA),1)];

                TableDistribution=[char({'Minimum';'25th percentile'; 'Median'; '75th percentile'; 'Maximum'}) ...
                     num2str((prctile(tmp_density(1,:),[0 25 50 75 100]))', '& %1.0f'), ...
                     num2str((prctile(tmp_firm_entry_rate(2,:),[0 25 50 75 100]))', '& %1.1f'), ...
                     num2str((prctile(tmp_firm_entry_rate(end-1,:),[0 25 50 75 100]))', '& %1.1f'), ...
                     num2str((prctile(vZit_age0_job_ctn_msa(2,:)./vZit_age0_firms_msa(2,:),[0 25 50 75 100]))', '& %1.1f'), ...
                     num2str((prctile(vZit_age0_job_ctn_msa(end-1,:)./vZit_age0_firms_msa(end-1,:),[0 25 50 75 100]))', '& %1.1f') ...
                     repmat(' \\', 5,1)];

                clear tmp_*;

            CellTable=cellstr(TableMSA);
            my_fid=fopen(['../Tables/MSA_List.tex'],'w');
            fprintf(my_fid, '%s\n',CellTable{:});
            fclose(my_fid);

            CellTable=cellstr(TableDistribution);
            my_fid=fopen(['../Tables/MSA_List_distribution.tex'],'w');
            fprintf(my_fid, '%s\n',CellTable{:});
            fclose(my_fid);

        case 1
            save ../Data/msa_data_wharton_jae
    end
end
%%
cd(current_pwd);