###################################################################################################
###  Monte Carlo Simulation Code for "Estimation of Average Treatment Effects Using Panel Data  ###
###  when Treatment Effect Heterogeneity Depends on Unobserved Fixed Effects"                   ###
###################################################################################################

####################
##  DGPs 3 and 4  ##
####################

####installing package####
library(MASS); library(plm)
library(AER); library(systemfit)

###time periods###
T <- 2

###coefficient values###
alpha0 <- -2; alpha1 <- -1; alpha2 <- 1; alpha3 <- 1
beta00 <- 2; beta01 <- 1; beta02 <- -0.5
beta10 <- 1; beta11 <- 2; beta12 <- 0.5
gamma0 <- 1
gamma1 <- 0.1 ##set gamma1 <- 0.1 and 0.05 for DGPs 3 and 4, respectively##
mu_x1 <- 1; mu_x2 <- 1.5; mu_w1 <- 1; mu_w2 <- 1.5; mu_c <- 1

##True Values of ATE##
ATE1_true <- (beta10-beta00) + (beta11-beta01)*mu_x1 + (beta12-beta02)*mu_w1 + (gamma1-gamma0)*mu_c
ATE2_true <- (beta10-beta00) + (beta11-beta01)*mu_x2 + (beta12-beta02)*mu_w2 + (gamma1-gamma0)*mu_c

##initial parameters values##
initial_par <- c(beta11,beta12,beta01,beta02,beta10-(gamma1/gamma0)*beta00,gamma1,beta00-(gamma0/gamma1)*beta10,ATE1_true,ATE2_true)

###Monte Carlo Simulation Function###

Monte_carlo <- function(N,Rep){
  
  ###pooling vector for estimates###
  ATE1_hat_z <- c(); ATE1_hat_x <- c(); ATE1_hat_w <- c(); ATE1_hat_xw <- c(); ATE1_hat_xwz <- c(); OLS1 <- c(); FE1 <- c()
  ATE2_hat_z <- c(); ATE2_hat_x <- c(); ATE2_hat_w <- c(); ATE2_hat_xw <- c(); ATE2_hat_xwz <- c(); OLS2 <- c(); FE2 <- c()  
  
  ##potential outcomes models##
  
  ##preliminary##
  m_U0U1 <- rep(0,4)
  sigma_U0U1 <- matrix(1,4,4)
  sigma_U0U1[1,2] <- 0.5; sigma_U0U1[2,1] <- 0.5; sigma_U0U1[3,4] <- 0.5; sigma_U0U1[4,3] <- 0.5
  sigma_U0U1[1,3] <- 0.3; sigma_U0U1[3,1] <- 0.3; sigma_U0U1[2,4] <- 0.3; sigma_U0U1[4,2] <- 0.3
  sigma_U0U1[1,4] <- 0.2; sigma_U0U1[4,1] <- 0.2; sigma_U0U1[2,3] <- 0.2; sigma_U0U1[3,2] <- 0.2
  m_UD <- rep(0,2)
  sigma_UD <- matrix(1,2,2)
  sigma_UD[1,2] <- 0.3; sigma_UD[2,1] <- 0.3
  m_XC <- c(mu_x1,mu_x2,mu_c)
  sigma_XC <- matrix(1,3,3)
  sigma_XC[1,2] <- 0.3; sigma_XC[2,1] <- 0.3; sigma_XC[1,3] <- 0.3; sigma_XC[3,1] <- 0.3; sigma_XC[2,3] <- 0.3; sigma_XC[3,2] <- 0.3
  m_W <- c(mu_w1,mu_w2)
  sigma_W <- matrix(1,2,2)
  sigma_W[1,2] <- 0.3; sigma_W[2,1] <- 0.3

  ##repitation begins##
  
  for(r in 1:Rep){
    
    ##error components##
    U0U1 <- mvrnorm(N,m_U0U1,sigma_U0U1)
    U0 <- cbind(U0U1[,1],U0U1[,3]); U1 <- cbind(U0U1[,2],U0U1[,4])
    UD <- mvrnorm(N,m_UD,sigma_UD)
    UW <- mvrnorm(N,m_UD,sigma_UD)

    ##exogeneous variables and fixed effects##
    XC <- mvrnorm(N,m_XC,sigma_XC)
    X <- XC[,1:2]
    C <- XC[,3]
    W <- mvrnorm(N,m_W,sigma_W)
    
    ##instrumental variables##
    Z <- cbind(C,C) + UW
    
    ##treatment assignment##
    D <- matrix(0,N,T)
    for(t in 1:T){
      for(i in 1:N){
        D[i,t] <- (alpha0 + alpha1*X[i,t] + alpha2*W[i,t] + alpha3*C[i] + UD[i,t]>0)*1
      }
    }
    
    #potential outcomes#
    Y1 <- matrix(beta10,N,T) + beta11*X + beta12*W + gamma1*cbind(C,C) + U1
    Y0 <- matrix(beta00,N,T) + beta01*X + beta02*W + gamma0*cbind(C,C) + U0
    
    #observed outcome
    Y <- D*Y1 + (1-D)*Y0
    
    ###Proposed Estimation###
    
    ##transformation##
    Y1_bar <- c(); Y0_bar <- c(); X1_bar <- c(); X0_bar <- c(); W1_bar <- c(); W0_bar <- c()
    for(i in 1:N){
      if (sum(D[i,])==0){Y1_bar[i] <- 0; X1_bar[i] <- 0; W1_bar[i] <- 0}
      else{Y1_bar[i] <- sum((D*Y1)[i,])/sum(D[i,]); X1_bar[i] <- sum((D*X)[i,])/sum(D[i,]);
      W1_bar[i] <- sum((D*W)[i,])/sum(D[i,])}
      if (sum(1-D[i,])==0){Y0_bar[i] <- 0; X0_bar[i] <- 0; W0_bar[i] <- 0}
      else{Y0_bar[i] <- sum(((1-D)*Y0)[i,])/sum(1-D[i,]); X0_bar[i] <- sum(((1-D)*X)[i,])/sum(1-D[i,]);
      W0_bar[i] <- sum(((1-D)*W)[i,])/sum(1-D[i,])}    
    }
    Y1_dash <- Y - matrix(Y1_bar,N,T); Y0_dash <- Y - matrix(Y0_bar,N,T)
    X1_dash <- X - matrix(X1_bar,N,T); X0_dash <- X - matrix(X0_bar,N,T)
    W1_dash <- W - matrix(W1_bar,N,T); W0_dash <- W - matrix(W0_bar,N,T)
    
    D1_mover <- matrix(0,N,T); D0_mover <- matrix(0,N,T)
    for(i in 1:N){
      if (sum(D[i,])!=T){D1_mover[i,] <- D[i,]}
      if (sum(D[i,])!=0){D0_mover[i,] <- 1-D[i,]}
    }
    
    ##GMM function##
    g_z_mean <- function(par,Z){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      g3_1 <- mean(D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_2 <- mean(D1_mover*Z*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_2 <- mean(D0_mover*Z*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g5 <- mean(D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1])) - par[8]
      g6 <- mean(D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2])) - par[9]
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g4_1,g4_2,g5,g6)
    }
    
    g_xw_mean <- function(par){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      g3_1 <- mean(D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_2 <- mean(D1_mover*X*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_3 <- mean(D1_mover*W*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_2 <- mean(D0_mover*X*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_3 <- mean(D0_mover*W*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g5 <- mean(D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1])) - par[8]
      g6 <- mean(D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2])) - par[9]
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g4_1,g4_2,g4_3,g5,g6)
    }
    
    g_xwz_mean <- function(par){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4])))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      g3_1 <- mean(D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_2 <- mean(D1_mover*X*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_3 <- mean(D1_mover*W*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g3_4 <- mean(D1_mover*Z*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_2 <- mean(D0_mover*X*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_3 <- mean(D0_mover*W*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g4_4 <- mean(D0_mover*Z*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1))
      g5 <- mean(D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1])) - par[8]
      g6 <- mean(D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1))) + mean((1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2])) - par[9]
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g3_4,g4_1,g4_2,g4_3,g4_4,g5,g6)
    }
    
    g_z <- function(par,Z){
      g1_1 <- c(); g1_2 <- c(); g2_1 <- c() ;g2_2 <- c()
      g3_1 <- c(); g3_2 <- c(); g4_1 <- c(); g4_2 <- c(); g5 <- c(); g6 <- c()
      
      G1_1 <- D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G1_2 <- D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      G3_1 <- D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_2 <- D1_mover*Z*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G4_1 <- D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_2 <- D0_mover*Z*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      g5 <- D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1)) + (1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1]) - par[8]
      g6 <- D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1)) + (1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2]) - par[9]
      
      g1_1 <- (G1_1[,1] + G1_1[,2])/T; g1_2 <- (G1_2[,1] + G1_2[,2])/T; g2_1 <- (G2_1[,1] + G2_1[,2])/T; g2_2 <- (G2_2[,1] + G2_2[,2])/T 
      g3_1 <- (G3_1[,1] + G3_1[,2])/T; g3_2 <- (G3_2[,1] + G3_2[,2])/T; g4_1 <- (G4_1[,1] + G4_1[,2])/T; g4_2 <- (G4_2[,1] + G4_2[,2])/T
      
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g4_1,g4_2,g5,g6)
    }
    
    g_xw <- function(par){
      g1_1 <- c(); g1_2 <- c(); g2_1 <- c() ;g2_2 <- c()
      g3_1 <- c(); g3_2 <- c(); g3_3 <- c(); g4_1 <- c(); g4_2 <- c(); g4_3 <- c()
      g5 <- c(); g6 <- c()
      
      G1_1 <- D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G1_2 <- D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      G3_1 <- D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_2 <- D1_mover*X*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_3 <- D1_mover*W*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G4_1 <- D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_2 <- D0_mover*X*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_3 <- D0_mover*W*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      g5 <- D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1)) + (1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1]) - par[8]
      g6 <- D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1)) + (1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2]) - par[9]
      
      g1_1 <- (G1_1[,1] + G1_1[,2])/T; g1_2 <- (G1_2[,1] + G1_2[,2])/T; g2_1 <- (G2_1[,1] + G2_1[,2])/T; g2_2 <- (G2_2[,1] + G2_2[,2])/T 
      g3_1 <- (G3_1[,1] + G3_1[,2])/T; g3_2 <- (G3_2[,1] + G3_2[,2])/T; g3_3 <- (G3_3[,1] + G3_3[,2])/T
      g4_1 <- (G4_1[,1] + G4_1[,2])/T; g4_2 <- (G4_2[,1] + G4_2[,2])/T; g4_3 <- (G4_3[,1] + G4_3[,2])/T
      
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g4_1,g4_2,g4_3,g5,g6)
    }
    
    g_xwz <- function(par){
      g1_1 <- c(); g1_2 <- c(); g2_1 <- c() ;g2_2 <- c()
      g3_1 <- c(); g3_2 <- c(); g3_3 <- c(); g3_4 <- c(); g4_1 <- c(); g4_2 <- c(); g4_3 <- c(); g4_4 <- c()
      g5 <- c(); g6 <- c()
      
      G1_1 <- D*(X1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G1_2 <- D*(W1_dash*(Y1_dash - X1_dash*par[1] - W1_dash*par[2]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - X0_dash*par[3] - W0_dash*par[4]))
      ETA0 <- Y0_bar - X0_bar*par[3] - W0_bar*par[4]
      ETA1 <- Y1_bar - X1_bar*par[1] - W1_bar*par[2]
      G3_1 <- D1_mover*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_2 <- D1_mover*X*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_3 <- D1_mover*W*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G3_4 <- D1_mover*Z*(Y - par[5] - X*par[1] - W*par[2] - par[6]*ETA0)
      G4_1 <- D0_mover*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_2 <- D0_mover*X*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_3 <- D0_mover*W*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      G4_4 <- D0_mover*Z*(Y - par[7] - X*par[3] - W*par[4] - (1/par[6])*ETA1)
      g5 <- D[,1]*(Y[,1] - (par[7] + X[,1]*par[3] + W[,1]*par[4] + (1/par[6])*ETA1)) + (1-D[,1])*(par[5] + X[,1]*par[1] + W[,1]*par[2] + par[6]*ETA0- Y[,1]) - par[8]
      g6 <- D[,2]*(Y[,2] - (par[7] + X[,2]*par[3] + W[,2]*par[4] + (1/par[6])*ETA1)) + (1-D[,2])*(par[5] + X[,2]*par[1] + W[,2]*par[2] + par[6]*ETA0- Y[,2]) - par[9]
      
      g1_1 <- (G1_1[,1] + G1_1[,2])/T; g1_2 <- (G1_2[,1] + G1_2[,2])/T; g2_1 <- (G2_1[,1] + G2_1[,2])/T; g2_2 <- (G2_2[,1] + G2_2[,2])/T 
      g3_1 <- (G3_1[,1] + G3_1[,2])/T; g3_2 <- (G3_2[,1] + G3_2[,2])/T; g3_3 <- (G3_3[,1] + G3_3[,2])/T; g3_4 <- (G3_4[,1] + G3_4[,2])/T
      g4_1 <- (G4_1[,1] + G4_1[,2])/T; g4_2 <- (G4_2[,1] + G4_2[,2])/T; g4_3 <- (G4_3[,1] + G4_3[,2])/T; g4_4 <- (G4_4[,1] + G4_4[,2])/T
      
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g3_4,g4_1,g4_2,g4_3,g4_4,g5,g6)
    }
    
    ##First step##
    f_z <- function(par){
      return(sum((g_z_mean(par,Z))^2))
      }
    result1_z <- optim(par=initial_par,fn=f_z)
    f_x <- function(par){
      return(sum((g_z_mean(par,X))^2))
    }
    result1_x <- optim(par=initial_par,fn=f_x)
    f_w <- function(par){
      return(sum((g_z_mean(par,W))^2))
    }
    result1_w <- optim(par=initial_par,fn=f_w)
    f_xw <- function(par){
      return(sum((g_xw_mean(par))^2))
    }
    result1_xw <- optim(par=initial_par,fn=f_xw)
    f_xwz <- function(par){
      return(sum((g_xwz_mean(par))^2))
    }
    result1_xwz <- optim(par=initial_par,fn=f_xwz)
    
    ##2nd step##
    W_z <- solve(t(g_z(result1_z$par,Z))%*%g_z(result1_z$par,Z)/N)
    W_x <- solve(t(g_z(result1_x$par,X))%*%g_z(result1_x$par,X)/N)
    W_w <- solve(t(g_z(result1_w$par,W))%*%g_z(result1_w$par,W)/N)
    W_xw <- solve(t(g_xw(result1_xw$par))%*%g_xw(result1_xw$par)/N)
    W_xwz <- solve(t(g_xwz(result1_xwz$par))%*%g_xwz(result1_xwz$par)/N)
    
    f_z <- function(par){
      return(g_z_mean(par,Z)%*%W_z%*%t(g_z_mean(par,Z)))
    }
    f_x <- function(par){
      return(g_z_mean(par,X)%*%W_x%*%t(g_z_mean(par,X)))
    }
    f_w <- function(par){
      return(g_z_mean(par,W)%*%W_w%*%t(g_z_mean(par,W)))
    }
    f_xw <- function(par){
      return(g_xw_mean(par)%*%W_xw%*%t(g_xw_mean(par)))
    }
    f_xwz <- function(par){
      return(g_xwz_mean(par)%*%W_xwz%*%t(g_xwz_mean(par)))
    }

    ##Estimation of ATE##
    result2_z <- optim(par=result1_z$par,fn=f_z)
    result2_x <- optim(par=result1_x$par,fn=f_x)
    result2_w <- optim(par=result1_w$par,fn=f_w)
    result2_xw <- optim(par=result1_xw$par,fn=f_xw)
    result2_xwz <- optim(par=result1_xwz$par,fn=f_xwz)

    ##Estimation of ATE##
    ATE1_hat_z[r] <- result2_z$par[8]; ATE2_hat_z[r] <- result2_z$par[9]
    ATE1_hat_x[r] <- result2_x$par[8]; ATE2_hat_x[r] <- result2_x$par[9]
    ATE1_hat_w[r] <- result2_w$par[8]; ATE2_hat_w[r] <- result2_w$par[9]
    ATE1_hat_xw[r] <- result2_xw$par[8]; ATE2_hat_xw[r] <- result2_xw$par[9]
    ATE1_hat_xwz[r] <- result2_xwz$par[8]; ATE2_hat_xwz[r] <- result2_xwz$par[9]
  
    ##creating panel data##
  
    data_mtx <- matrix(0,2*N,7)
    for(i in 1:N){
      data_mtx[(2*i-1):(2*i),1] <- i
      data_mtx[(2*i-1),2] <- 1
      data_mtx[(2*i),2] <- 2
      data_mtx[(2*i-1),3] <- Y[i,1]
      data_mtx[(2*i),3] <- Y[i,2]
      data_mtx[(2*i-1),4] <- D[i,1]
      data_mtx[(2*i),4] <- D[i,2]
      data_mtx[(2*i-1),5] <- X[i,1]-mean(X[,1])
      data_mtx[(2*i),5] <- X[i,2]-mean(X[,2])
      data_mtx[(2*i-1),6] <- W[i,1]-mean(W[,1])
      data_mtx[(2*i),6] <- W[i,2]-mean(W[,2])
      data_mtx[2*i,7] <- 1
      }
    data <- data.frame(data_mtx)
    names(data) <- c("id","t","y","d","x_mdif","w_mdif","t2")
    pdata <- pdata.frame(data, index=c("id","t"))
    
    ###Pooling OLS Estimation###
    result_OLS <- plm(y ~ d*t2 + d*x_mdif + d*w_mdif, data=pdata, model= "pooling")
    OLS1[r] <- result_OLS$coefficient[[2]] 
    OLS2[r] <- result_OLS$coefficient[[2]] + result_OLS$coefficient[[6]]
    
    ###FE Estimation###
    result_FE <- plm(y ~ d*t2 + d*x_mdif + d*w_mdif, data=pdata, model= "within")
    FE1[r] <- result_FE$coefficient[[1]]
    FE2[r] <- result_FE$coefficient[[1]] + result_FE$coefficient[[5]]
    ##repitation ends##
    }
  bias_OLS1 <- OLS1 - ATE1_true
  bias_FE1 <- FE1 - ATE1_true 
  bias_ATE1_hat_z <- ATE1_hat_z - ATE1_true
  bias_ATE1_hat_x <- ATE1_hat_x - ATE1_true
  bias_ATE1_hat_w <- ATE1_hat_w - ATE1_true
  bias_ATE1_hat_xw <- ATE1_hat_xw - ATE1_true
  bias_ATE1_hat_xwz <- ATE1_hat_xwz - ATE1_true
  
  bias_OLS2 <- OLS2 - ATE2_true
  bias_FE2 <- FE2 - ATE2_true 
  bias_ATE2_hat_z <- ATE2_hat_z - ATE2_true
  bias_ATE2_hat_x <- ATE2_hat_x - ATE2_true
  bias_ATE2_hat_w <- ATE2_hat_w - ATE2_true
  bias_ATE2_hat_xw <- ATE2_hat_xw - ATE2_true
  bias_ATE2_hat_xwz <- ATE2_hat_xwz - ATE2_true
  
  return(
    list(
    ###Mean bias###
    c(
      mean(bias_OLS1),mean(bias_FE1),mean(bias_ATE1_hat_z),mean(bias_ATE1_hat_x),
      mean(bias_ATE1_hat_w),mean(bias_ATE1_hat_xw),mean(bias_ATE1_hat_xwz),
      mean(bias_OLS2),mean(bias_FE2),mean(bias_ATE2_hat_z),mean(bias_ATE2_hat_x),
      mean(bias_ATE2_hat_w),mean(bias_ATE2_hat_xw),mean(bias_ATE2_hat_xwz)
      ),
    ###SD###
    c(
      sd(OLS1),sd(FE1),sd(ATE1_hat_z),sd(ATE1_hat_x),
      sd(ATE1_hat_w),sd(ATE1_hat_xw),sd(ATE1_hat_xwz),
      sd(OLS2),sd(FE2),sd(ATE2_hat_z),sd(ATE2_hat_x),
      sd(ATE2_hat_w),sd(ATE2_hat_xw),sd(ATE2_hat_xwz)
    ),
    ###RMSE###
    c(
      sqrt(mean((bias_OLS1)^2)),sqrt(mean((bias_FE1)^2)),sqrt(mean((bias_ATE1_hat_z)^2)),sqrt(mean((bias_ATE1_hat_x)^2)),
      sqrt(mean((bias_ATE1_hat_w)^2)),sqrt(mean((bias_ATE1_hat_xw)^2)),sqrt(mean((bias_ATE1_hat_xwz)^2)),
      sqrt(mean((bias_OLS2)^2)),sqrt(mean((bias_FE2)^2)),sqrt(mean((bias_ATE2_hat_z)^2)),sqrt(mean((bias_ATE2_hat_x)^2)),
      sqrt(mean((bias_ATE2_hat_w)^2)),sqrt(mean((bias_ATE2_hat_xw)^2)),sqrt(mean((bias_ATE2_hat_xwz)^2))
    )
  )
)
}  


###Results###
Monte_carlo(200,1000)
Monte_carlo(500,1000)
Monte_carlo(800,1000)
