function [gmm2 gmmb se] = mullahyiv(Y,X,Z,gmm0)
%% Calculates GMM point estimator and standard error 
%% for a two-way exponential regression model on a (balanced) n-by-m panel data set
%% using instrumental variables based on Mullahy (1997 RESTAT) and performs a subsequent bias correction
% INPUT: 
% Y = n-by-m array of outcomes; 
% X = d-dimensional cell of n-by-m matrices of regressors ; 
% Z = q-dimensional cell of n-by-m matrices of instruments;  Z must include exogenous X
% starting value for optimization (gmm0).
% OUTPUT: 
% point estimate (gmm); 
% standard error (se);
% asymptotic covariance matrix (asyvar)

% NOTES: 
% This algorithm estimates two-way FE. Partitioned-inverse formulae are used to speed up computation.

warning('off','all');

[q, ~] = size(Z); % number of moment conditions 

% Minimization of GMM problem by Newton's method, starting at gmm0
% one-step 
[gmm1, condition, numiter, S] = Newton(@QuadraticForm,gmm0,Y,X,Z,eye(q)); 
[bias, invV, asyvar] = Bias(Y,X,Z,gmm1);
% two-step
[gmm2, condition, numiter, S] = Newton(@QuadraticForm,gmm1,Y,X,Z,invV);
[bias, invV, asyvar] = Bias(Y,X,Z,gmm2);

gmmb = gmm2-bias;
se = sqrt(diag(asyvar));







function [criterion score Hessian H] = QuadraticForm(theta,Y,X,Z,invV)
[N, T] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions 

[alpha gamma f condition it]=FEmax(Y,X,theta,zeros(N,1),zeros(T,1));

A = alpha*ones(1,T) ;
G = ones(N,1)*gamma'; 
index = zeros(N,T); for k=1:d, index = index+X{k}*theta(k); end % linear index 
lambda = exp(index+A+G); V = Y./lambda;

% score vector
S = zeros(q,1); for k=1:q, S(k) = mean(mean(Z{k}.*(V-1))); end
% Jacobian matrix
H = zeros(q,d); 
for k=1:q,
    Q = (Z{k}-mean(mean(Z{k})))-(mean(Z{k},2)*ones(1,T)-mean(mean(Z{k})))-(ones(N,1)*mean(Z{k},1)-mean(mean(Z{k})));
    Xi = Z{k}-Q;
    for j=1:d,
        QQ = ((V.*X{j})-mean(mean(V.*X{j})))-(mean(V.*X{j},2)*ones(1,T)-mean(mean(V.*X{j})))-(ones(N,1)*mean(V.*X{j},1)-mean(mean(V.*X{j})));
        XXi = (V.*X{j})-QQ;
        
        H(k,j) = mean(mean(V.*(Z{k}.*X{j}-Xi.*XXi)));
    end
end
H = -H; 

%if q>d, 
%    Var = zeros(q,q);
%    for k=1:q,
%        Q = (Z{k}-mean(mean(Z{k})))-(mean(Z{k},2)*ones(1,T)-mean(mean(Z{k})))-(ones(N,1)*mean(Z{k},1)-mean(mean(Z{k})));
%        Xi = Z{k}-Q;
%        for j=1:q
%            QQ  = (Z{j}-mean(mean(Z{j})))-(mean(Z{j},2)*ones(1,T)-mean(mean(Z{j})))-(ones(N,1)*mean(Z{j},1)-mean(mean(Z{j})));
%            XXi = Z{j}-QQ;
%            Var(k,j) = mean(mean((Q.*QQ).*(V-1).^2));
%        end
%    end
%else
%    Var=eye(q);
%end
%invV = inv(Var);

% objective function
criterion = -S'*invV*S/2;
score     = -H'*invV*S  ;
Hessian   = -H'*invV*H  ;


%% Bias calculation
function [bias, invV, asyvar] = Bias(Y,X,Z,theta)
[N, T] = size(Y); % sample size
[d, ~] = size(X); % number of regressors
[q, ~] = size(Z); % number of moment conditions 

[alpha gamma f condition it]=FEmax(Y,X,theta,zeros(N,1),zeros(T,1));

A = alpha*ones(1,T) ;
G = ones(N,1)*gamma'; 
index = zeros(N,T); for k=1:d, index = index+X{k}*theta(k); end % linear index 
lambda = exp(index+A+G); V = Y./lambda;

% Bias and Jacobian
b = zeros(q,1);
H = zeros(q,d); 
for k=1:q,
    Q = (Z{k}-mean(mean(Z{k})))-(mean(Z{k},2)*ones(1,T)-mean(mean(Z{k})))-(ones(N,1)*mean(Z{k},1)-mean(mean(Z{k})));
    Xi = Z{k}-Q;
    
    B1 = -mean(mean((V-1).*V.*Q,2));
    B2 =  mean(mean((V-1).^2,2).*mean(V.*Q,2)); B2 = B2/2;

    D1 = -mean(mean((V-1).*V.*Q,1));
    D2 =  mean(mean((V-1).^2,1).*mean(V.*Q,1)); D2 = D2/2;
    
    b(k) = (B1+B2)/T + (D1+D2)/N;

    for j=1:d,
        QQ = ((V.*X{j})-mean(mean(V.*X{j})))-(mean(V.*X{j},2)*ones(1,T)-mean(mean(V.*X{j})))-(ones(N,1)*mean(V.*X{j},1)-mean(mean(V.*X{j})));
        XXi = (V.*X{j})-QQ;
        
        H(k,j) = mean(mean(V.*(Z{k}.*X{j}-Xi.*XXi)));
    end
end
H = -H;


Var = zeros(q,q);
for k=1:q,
    Q = (Z{k}-mean(mean(Z{k})))-(mean(Z{k},2)*ones(1,T)-mean(mean(Z{k})))-(ones(N,1)*mean(Z{k},1)-mean(mean(Z{k})));
    Xi = Z{k}-Q;
    for j=1:q
        QQ  = (Z{j}-mean(mean(Z{j})))-(mean(Z{j},2)*ones(1,T)-mean(mean(Z{j})))-(ones(N,1)*mean(Z{j},1)-mean(mean(Z{j})));
        XXi = Z{j}-QQ;
        Var(k,j) = mean(mean((Q.*QQ).*(V-1).^2));
    end
 end

invV = inv(Var);

% bias
bias = -inv(H'*invV*H)*(H'*invV*b);

asyvar = inv(H'*invV*H)/(N*T);




%% FE estimation for given value of common parameter
function [L S H IH] = FE(Y,X,theta,alpha,gamma)
[N, T] = size(Y); % sample size
[d, ~] = size(X); % number of regressors

A = alpha*ones(1,T) ;  
G = ones(N,1)*gamma'; 
index = zeros(N,T); for k=1:d, index = index+X{k}*theta(k); end % linear index 
lambda = exp(index+A+G); V = Y./lambda;

s_a = sum(V-1,2) /sqrt(N*T) ;  s_a(1) = [];
s_g = sum(V-1,1)'/sqrt(N*T); 

L = s_a'*s_a+s_g'*s_g; L = -.5*L; 

H_aa = diag(sum(-V,2))/sqrt(N*T); H_ag = - V /sqrt(N*T);  H_aa(1,:) = []; H_aa(:,1) = [];
H_gg = diag(sum(-V,1))/sqrt(N*T); H_ga = - V'/sqrt(N*T);  H_ag(1,:) = []; H_ga(:,1) = [];             

S = [s_a; s_g]; 
H = [H_aa, H_ag; H_ga, H_gg]; 

IH_aa = diag(1./diag(H_aa));
IH_gg = diag(1./diag(H_gg));

block1 = [inv(H_aa-H_ag*IH_gg*H_ga), zeros(N-1,T); zeros(T,N-1), inv(H_gg-H_ga*IH_aa*H_ag)];
block2 = [eye(N-1)                 ,  -H_ag*IH_gg;    -H_ga*IH_aa, eye(T)                 ];
 
IH = block1*block2;

function [alpha gamma f condition it]=FEmax(Y,X,theta,alpha,gamma)
[N T] = size(Y);
tol=1e-3; maxit=100; smalleststep=.5^20;
it=1; condition=1; improvement=1; 
[f g H IH] = FE(Y,X,theta,alpha,gamma);
while it<=maxit && condition==1 && improvement==1;
    d = -IH*g; d = [0; d];
    step=1; improvement=0;
    while step>=smalleststep && improvement==0;
        n_alpha = alpha+step*d(  1 : N); 
        n_gamma = gamma+step*d(N+1:end); 
        [ff gg HH IHH] = FE(Y,X,theta,n_alpha,n_gamma);
        if (ff-f)/abs(f)>=-1e-5
            improvement=1; condition=sqrt(step*step*(d'*d))>tol & (ff-f)>tol;
            alpha=n_alpha;
            gamma=n_gamma;
            f=ff; g=gg; H=HH;
        else
            step=step/2;
        end
    end
    it=it+1;
end
it=it-1;



%% Newton algorithm used for optimization
function [x condition it J]=Newton(FUN,x,varargin) % varargout
% maximises FUN, starting at x by Newton-Raphson method
tol=1e-5; maxit=100; smalleststep=.5^20;
it=1; condition=1; improvement=1; k=length(x);
[f g H J] =feval(FUN,x,varargin{:}); %varargout
while it<=maxit && condition==1 && improvement==1;
    [s1 s2]=size(H); if s1==s2 && s2>1 d=-inv(H)*g; else d=-g./H; end      
    step=1; improvement=0;
    while step>=smalleststep && improvement==0;
        [ff gg HH JJ] =feval(FUN,x+step*d,varargin{:}); %varargout
        bounded = sum(sum(isnan(HH)))==0 & sum(sum(isinf(HH)))==0;
        if (ff-f)/abs(f)>=-1e-5 & bounded==1;
            improvement=1; condition=sqrt(step*step*(d'*d))>tol & (ff-f)>tol;
            x=x+step*d; f=ff; g=gg; H=HH; J = JJ;
        else
            step=step/2;
        end
    end
    it=it+1;
end
it=it-1;