% date: 20210105
% author: Simon Kwok

% purpose:	to compute interval estimate of bubbles from calls and/or puts (see Jarrow and Kwok (2021), Proposition 3)

% input variables: 
% yr 		list of years
% pow 		maximum power for local polynomial regression (denoted p in Appendix A1)
% step 		step size for discretizing the strike price dimension, (denoted ?s in Appendix A1)
 
% output variables (as inputs in bubcp_trade.m):
% bubout	cell array containing variables related to bubbles
% dataout	cell array containing raw data series
% setout	cell array containing environmental variables and input parameters

% input datasets:
% allopt_ddMmmyy_to_ddMmmyy_count.csv
% allopt_ddMmmyy_to_ddMmmyy.csv

% required functions:
% anticonv_call.m       constrained least squares estimation of call prices
% anticonv_put.m        constrained least squares estimation of put prices
% lpoly.m               local polynomial regression
% nw_cov.m              Newey-West variance

% example:
% yr = {'1996','1997','1998','1999','2000','2001','2002','2003','2004','2005','2006','2007','2008','2009','2010','2011','2012','2013','2014','2015'};
% pow = 2;	
% step = 5;
% [bubout,dataout,setout]=bubcp(yr,pow,step);

function [bubout,dataout,setout]=bubcp(yr,pow,step)

filetype = 'allopt';

warning('off','MATLAB:nearlySingularMatrix')
warning('off','MATLAB:SingularMatrix')

% constrained least squares parameters
nint = 1000;        % maximum number of iteration 
precis = 10^(-6);   % precision 

% local polynomial regression parameters
hnumsd = 3;         % number of mean dk set for h0 (initial bandwidth)
opth = 1;           % use optimal local bandwidth (see Appendix A1) 

modelname='bubcp';

nyr = length(yr);
nperiod = 0;

%% import data
for j=1:nyr
    
    % Import count data
    filename = strcat(filetype,'_01Jan',yr{j},'to31Dec',yr{j},'_','count','.csv');
    fd=fopen(filename);
    q=fread(fd,inf,'*uchar');
    fclose(fd);
    nlines = sum(q==10) - 1;
    source = fopen(filename);
    head = textscan(source,'%s %s',1,'delimiter',',');
    c = textscan(source,'%s %f','delimiter',',');
    nkcnt = c{2};

    % Import call and put data
    filename = strcat(filetype,'_01Jan',yr{j},'to31Dec',yr{j},'.csv');
    fd=fopen(filename);
    q=fread(fd,inf,'*uchar');
    fclose(fd);
    source = fopen(filename);
    head = textscan(source,'%s %s %s %s %s %s %s %s %s %s %s %s %s',1,'delimiter',',');

    for i=1:nlines
        t = nperiod+i;
        c = textscan(source,'%s %s %s %f %f %f %f %f %f %f %f %f %f',nkcnt(i),'delimiter',',');
        dateraw{t} = c{1}';
        cp_flag{t} = c{2}';
        exdateraw{t} = c{3}';
        tauday{t} = c{4}';
        X{t} = c{5}';
        s{t} = c{6}';
        dyma{t} = c{7}';    
        tr{t} = c{8}';
        money{t} = c{9}';
        oprice{t} = c{10}';
        volume{t} = c{11}';
        iv{t} = c{12}';
        deltachk{t} = c{13}';

        cp{t}=strcmp(cp_flag{t},'C');     % cp = 1 if call,   cp = 0 if put
        ncall(t) = sum(cp{t});

        tau{t} = tauday{t}/365;   % time to maturity
        sout(t) = s{t}(1);

        da(t,1)=datenum(dateraw{t}(1),'ddmmmyyyy');
    end
    fclose(source);

    nperiod = nperiod + nlines;
end

%% interval estimation

tic
mntau = 10;         % an integer > max number of different tau's on a day

nkc = zeros(nperiod,mntau);
nkp = zeros(nperiod,mntau);
sumvolc = zeros(nperiod,mntau);
sumvolp = zeros(nperiod,mntau);

otmc = zeros(nperiod,mntau);
call1 = zeros(nperiod,mntau);
sbub_qcdfp = zeros(nperiod,mntau);
sbub_qcdfc = zeros(nperiod,mntau);
sbub_qcdf = zeros(nperiod,mntau);

sbub_qcdfp_se = zeros(nperiod,mntau);
sbub_qcdfc_se = zeros(nperiod,mntau);
sbub_qcdf_se = zeros(nperiod,mntau);

qcdfp_bias = zeros(nperiod,mntau);
qcdfc_bias = zeros(nperiod,mntau);
qcdf_bias = zeros(nperiod,mntau);
qcdf_A_lb = zeros(nperiod,mntau);
qcdf_A_ub = zeros(nperiod,mntau);
qcdf_Ap_lb = zeros(nperiod,mntau);
qcdf_Ap_ub = zeros(nperiod,mntau);
qcdf_Ac_lb = zeros(nperiod,mntau);
qcdf_Ac_ub = zeros(nperiod,mntau);
qcdf_B1 = zeros(nperiod,mntau);
qcdf_B21 = zeros(nperiod,mntau);
qcdf_B22 = zeros(nperiod,mntau);
qcdf_B23 = zeros(nperiod,mntau);
qcdf_B3 = zeros(nperiod,mntau);

Bcbub_lb = zeros(nperiod,mntau);
Bcbub_ub = zeros(nperiod,mntau);

scene = zeros(nperiod,mntau);
qcdfp_lb = zeros(nperiod,mntau);
qcdfp_ub = zeros(nperiod,mntau);
qcdfc_lb = zeros(nperiod,mntau);
qcdfc_ub = zeros(nperiod,mntau);
lp = zeros(nperiod,mntau);
up = zeros(nperiod,mntau);
lc = zeros(nperiod,mntau);
uc = zeros(nperiod,mntau);

for t=1:nperiod   
    ptaulist = unique(tau{t}(~cp{t}));
    ctaulist = unique(tau{t}(cp{t}));
    taulist = intersect(ptaulist,ctaulist);
    ntau = length(taulist);  
    
    for j=1:ntau   
        vput = ((tau{t}==taulist(j)) & ~cp{t});
        np = sum(vput);
        put = oprice{t}(vput);
        pk = X{t}(vput);        
        dk = pk(2:np)-pk(1:np-1);        
        volp = volume{t}(vput);      

        vcall = ((tau{t}==taulist(j)) & cp{t});
        nc = sum(vcall);
        call = oprice{t}(vcall);
        ck = X{t}(vcall);                
        dck = ck(2:nc)-ck(1:nc-1);
        volc = volume{t}(vcall);
        
        vx = step:step:max(pk(end),ck(end))+step-1;     % grid of strikes 
        pk1 = vx(vx<=pk(1));
        pk2 = vx(vx>=pk(end));
        xpk = pk1(end):step:pk2(1);
        nxp = length(xpk);
        
        dis = exp(-tr{t}(1)*taulist(j));   % discount factor
        
        % estimation using puts 
        g = anticonv_put(nint, precis, pk', put', dis);    % inputs/output as column
        
        qcdfp = zeros(1,nxp);
        qcdfp_se_pt = zeros(1,nxp);
        hx0 = mean(dk)*hnumsd;
        %hx0 = step*iwin;
        for i=1:nxp            
            [bg,bg_se,hpopt0] = lpoly(xpk(i),pk',g,pow,opth,hx0);    % inputs/output as column
            if opth == 3
                [bg,bg_se] = lpoly(xpk(i),pk',g,pow,0,hpopt0);
            end

            qcdfp_out = bg(2)*dis^(-1);
            qcdfp_se_pt_out = bg_se(2)*dis^(-1);
            
            % make Q and se valid
            qcdfp(i) = min(1,max(0,qcdfp_out));
            if i > 1 && qcdfp(i-1) == 1
                qcdfp(i) = 1;
            end
            if qcdfp(i) == 0 || qcdfp(i) == 1
                qcdfp_se_pt(i) = 0;
            else
                qcdfp_se_pt(i) = qcdfp_se_pt_out;
            end
        end                  
        
        % mean and s.e. estimates using puts
        qcdfp_norm = (qcdfp - min(qcdfp))/(max(qcdfp)-min(qcdfp));
        qcdfp_mu = (nansum(1-qcdfp_norm)*step + xpk(1))*dis;  
        
        mp = ceil(nxp^0.25);
        wp = ones(1,nxp) * step*dis/(max(qcdfp)-min(qcdfp));
        wp(1) = (dis*step*sum(qcdfp) - (put(end) - put(1)))/(max(qcdfp)-min(qcdfp))^2 ...
            + dis*step/(max(qcdfp)-min(qcdfp)) ;
        wp(end) = -(dis*step*sum(qcdfp) - (put(end) - put(1)))/(max(qcdfp)-min(qcdfp))^2 ...
            + dis*step/(max(qcdfp)-min(qcdfp)) ;        
        
        qcdfp_pt = qcdfp .* wp;
        qcdfp_norm_se_pt = qcdfp_se_pt .* wp;       
        qcdfp_se = sqrt(nansum(qcdfp_norm_se_pt.^2) + 2*nw_cov(qcdfp_pt,mp)) ;
        
        % estimation using calls        
        gc = anticonv_call(nint, precis, ck', call', dis);    % inputs/output as column  
        %xck = min(ck):step:max(ck);
        ck1 = vx(vx<=ck(1));
        ck2 = vx(vx>=ck(end));
        xck = ck1(end):step:ck2(1);
        nxc = length(xck);        
        
        qcdfc = zeros(1,nxc);
        qcdfc_se_pt = zeros(1,nxc);
        hx0 = mean(dck)*hnumsd;
        %hx0 = step*iwin;
        for i=1:nxc            
            [bgc,bgc_se,hcopt0] = lpoly(xck(i),ck',gc,pow,opth,hx0);    % inputs/output as column
            if opth == 3
                [bgc,bgc_se] = lpoly(xck(i),ck',gc,pow,0,hcopt0);
            end

            qcdfc_out = 1 + bgc(2)*dis^(-1);
            qcdfc_se_pt_out = bgc_se(2)*dis^(-1);
            
            % make Q and se valid
            qcdfc(i) = min(1,max(0,qcdfc_out));
            if i > 1 && qcdfc(i-1) == 1
                qcdfc(i) = 1;
            end
            if qcdfc(i) == 0 || qcdfc(i) == 1
                qcdfc_se_pt(i) = 0;
            else
                qcdfc_se_pt(i) = qcdfc_se_pt_out;
            end
        end    

        % mean and s.e. estimates using calls
        qcdfc_norm = (qcdfc - min(qcdfc))/(max(qcdfc)-min(qcdfc));     
        qcdfc_mu = (nansum(1-qcdfc_norm)*step + xck(1))*dis ;       
        
        mc = ceil(nxc^0.25);
        wc = ones(1,nxc)*step*dis/(max(qcdfc)-min(qcdfc));
        wc(1) = (dis*step*sum(qcdfc) - (call(end) - call(1) + dis*(ck(end)-ck(1))))/(max(qcdfc)-min(qcdfc))^2 ...
            + dis*step/(max(qcdfc)-min(qcdfc));
        wc(end) = -(dis*step*sum(qcdfc) - (call(end) - call(1) + dis*(ck(end)-ck(1))))/(max(qcdfc)-min(qcdfc))^2 ...
            + dis*step/(max(qcdfc)-min(qcdfc));       
        
        qcdfc_pt = qcdfc .* wc;
        qcdfc_norm_se_pt = qcdfc_se_pt .* wc;       
       
        qcdfc_se = sqrt(nansum(qcdfc_norm_se_pt.^2) + 2*nw_cov(qcdfc_pt,mc));

        % combine strike grids of calls and puts, updated on 20210108
        n = nc + np;
        qcdf_mu = np/n*qcdfp_mu + nc/n*qcdfc_mu;        
        qcdf_se = sqrt((np/n*qcdfp_se)^2 + (nc/n*qcdfc_se)^2);
          
        % bias
        lc_p = find(pk>=ck(1),1,'first');
        up_c = find(ck<=pk(end),1,'last');
        lp_c = find(ck>=pk(1),1,'first');
        uc_p = find(pk<=ck(end),1,'last');

        if isempty(lc_p)
            lc_p = find(pk<=ck(1),1,'last');
        end
        if isempty(uc_p)
            uc_p = find(pk>=ck(end),1,'first');
        end
        if isempty(up_c)
            up_c = find(ck>=pk(end),1,'first');
        end
        if isempty(lp_c)
            lp_c = find(ck<=pk(1),1,'last');
        end            
        
        B1p = dis * min(qcdfp) * (pk(end) - pk(1)) / (max(qcdfp)-min(qcdfp));
        B1c = dis * min(qcdfc) * (ck(end) - ck(1)) / (max(qcdfc)-min(qcdfc));
        
        B2p = (1/(max(qcdfp)-min(qcdfp))-1)*(put(end) - put(1));
        B2c = (1/(max(qcdfc)-min(qcdfc))-1)*(dis*(ck(end)-ck(1)) + call(end) - call(1));
        
        A_lb = -put(1);
        A_ub = call(end);
        Ap_lb = -put(1);
        Ap_ub = call(up_c);
        Ac_lb = -put(lc_p);
        Ac_ub = call(end);        
        
        % compute bias for combined put and call estimate
        B1 = (np*B1p + nc*B1c)/n;
        B21 = (np*B2p + nc*B2c)/n;
        B3 = np/n*dis*(ck(end)-pk(end));

        if pk(1) <= ck(1) && pk(end) <= ck(end) && ck(1) <= pk(end)
            B22 = np/n*(dis*(ck(end)-pk(end)) + call(up_c) - call(up_c));
            B23 = nc/n*(put(lc_p) - put(1));
            scene(t,j) = 1;
            
        elseif ck(1) <= pk(1) && ck(end) <= pk(end) && pk(1) <= ck(end)
            B22 = -np/n*(put(end) - put(uc_p));
            B23 = -nc/n*(dis*(pk(1)-ck(1)) + call(lp_c) - call(1));
            scene(t,j) = 2;
            
        elseif pk(1) <= ck(1) && ck(end) <= pk(end)
            B22 = nc/n*(put(lc_p) - put(1));
            B23 = -np/n*(put(end) - put(uc_p));
            scene(t,j) = 3;
            
        elseif ck(1) <= pk(1) && pk(end) <= ck(end)            
            B22 = -nc/n*(dis*(pk(1)-ck(1)) + call(lp_c) - call(1));
            B23 = np/n*(dis*(ck(end)-pk(end)) + call(end) - call(up_c));
            scene(t,j) = 4;
            
        elseif pk(end) < ck(1)
            B22 = nc/n*(put(end) - put(1));
            B23 = np/n*(dis*(ck(end)-ck(1)) + call(end) - call(1));
            A_ub = call(end) + dis;
            scene(t,j) = 5;
        else
            scene(t,j) = 6;
        end        
        
        qcdfp_bias(t,j) = B1p - B2p;                
        qcdfc_bias(t,j) = B1c - B2c;
        qcdf_bias(t,j) = B1 - B21 + B22 + B23 - B3;
        qcdf_A_lb(t,j) = A_lb;
        qcdf_A_ub(t,j) = A_ub;
        qcdf_Ap_lb(t,j) = Ap_lb;
        qcdf_Ap_ub(t,j) = Ap_ub;
        qcdf_Ac_lb(t,j) = Ac_lb;
        qcdf_Ac_ub(t,j) = Ac_ub;
        qcdf_B1(t,j) = B1;
        qcdf_B21(t,j) = B21;
        qcdf_B22(t,j) = B22;
        qcdf_B23(t,j) = B23;
        qcdf_B3(t,j) = B3;
        
        % compute bubble
        otmc(t,j) = call(end);
        call1(t,j) = call(1);
        
        sbub_qcdfp(t,j) = sout(t) - qcdfp_mu;  
        sbub_qcdfc(t,j) = sout(t) - qcdfc_mu;
        sbub_qcdf(t,j) = sout(t) - qcdf_mu;  
        
        sbub_qcdfp_se(t,j) = qcdfp_se;          
        sbub_qcdfc_se(t,j) = qcdfc_se; 
        sbub_qcdf_se(t,j) = qcdf_se;
        
        % study the extent of truncation
        qcdfp_lb(t,j) = min(qcdfp);
        qcdfp_ub(t,j) = max(qcdfp);
        qcdfc_lb(t,j) = min(qcdfc);
        qcdfc_ub(t,j) = max(qcdfc);
               
        lp(t,j) = pk(1);
        up(t,j) = pk(end);
        lc(t,j) = ck(1);
        uc(t,j) = ck(end);

        nkp(t,j) = np;
        nkc(t,j) = nc;
        
        sumvolc(t,j) = sum(volc);
        sumvolp(t,j) = sum(volp);
        
        Bcbub_lb(t,j) = -(1/(max(qcdfc)-min(qcdfc))-1)*call(1);
        Bcbub_ub(t,j) = -(1/(max(qcdfc)-min(qcdfc))-1)*call(end);
    end

end        

toc

%% output variables

bubout.otmc = otmc;
bubout.call1 = call1;
bubout.sbub_qcdfp = sbub_qcdfp;
bubout.sbub_qcdfc = sbub_qcdfc;
bubout.sbub_qcdf = sbub_qcdf;
bubout.sbub_qcdfp_se = sbub_qcdfp_se;
bubout.sbub_qcdfc_se = sbub_qcdfc_se;
bubout.sbub_qcdf_se = sbub_qcdf_se;

bubout.qcdfp_bias = qcdfp_bias;
bubout.qcdfc_bias = qcdfc_bias;
bubout.qcdf_bias = qcdf_bias;
bubout.qcdf_A_lb = qcdf_A_lb;
bubout.qcdf_A_ub = qcdf_A_ub;
bubout.qcdf_Ap_lb = qcdf_Ap_lb;
bubout.qcdf_Ap_ub = qcdf_Ap_ub;
bubout.qcdf_Ac_lb = qcdf_Ac_lb;
bubout.qcdf_Ac_ub = qcdf_Ac_ub;
bubout.qcdf_B1 = qcdf_B1;
bubout.qcdf_B21 = qcdf_B21;
bubout.qcdf_B22 = qcdf_B22;
bubout.qcdf_B23 = qcdf_B23;
bubout.qcdf_B3 = qcdf_B3;

bubout.Bcbub_lb = Bcbub_lb;
bubout.Bcbub_ub = Bcbub_ub;

bubout.scene = scene;
bubout.qcdfp_lb = qcdfp_lb;
bubout.qcdfp_ub = qcdfp_ub;
bubout.qcdfc_lb = qcdfc_lb;
bubout.qcdfc_ub = qcdfc_ub;
bubout.lp = lp;
bubout.up = up;
bubout.lc = lc;
bubout.uc = uc;

bubout.nkc = nkc;
bubout.nkp = nkp;
bubout.sumvolc = sumvolc;
bubout.sumvolp = sumvolp;

setout.filetype = filetype;
setout.modelname = modelname;
setout.yr = yr;
setout.pow = pow;
setout.step = step;
setout.opth = opth;
setout.hnumsd = hnumsd;
setout.nperiod = nperiod;

dataout.sout = sout;
dataout.oprice = oprice;
dataout.cp = cp;
dataout.X = X;
dataout.tau = tau;
dataout.tr = tr;
dataout.dyma = dyma;
dataout.da = da;

end

