###################################################################################################
###  Monte Carlo Simulation Code for "Estimation of Average Treatment Effects Using Panel Data  ###
###  when Treatment Effect Heterogeneity Depends on Unobserved Fixed Effects"                   ###
###################################################################################################

###################################
##  Models 1 and 2  in Appendix B##
###################################

####installing package####
library(MASS); library(plm)
library(AER); library(systemfit)

###coefficient values###
alpha0 <- -2; alpha1 <- -1; alpha2 <- 1; alpha3 <- 1
beta00 <- 2; beta0T <- 0.1; beta01 <- 1; beta02 <- -0.5
beta10 <- 1; beta1T <- 0.15; beta11 <- 2; beta12 <- 0.5
gamma0 <- 1
gamma1 <- 1 ##set gamma1 <- 1 and gamma1 <- 3 for Models 1 and 2, respectively
mu_x1 <- 1; mu_x2 <- 1.5; mu_w1 <- 1; mu_w2 <- 1.5; mu_c <- 1

###Monte Carlo Simulation Function###

Monte_carlo <- function(N,T,Rep){
  
  ##True Values of ATE##
  ATE_true <- c()
  for(t in 1:T){
    ATE_true[t] <- (beta10-beta00) + (beta1T-beta0T)*t + (beta11-beta01)*mu_x1 + (beta12-beta02)*mu_w1 + (gamma1-gamma0)*mu_c
  }
  
  ##initial parameters values##
  initial_par <- c(beta1T,beta11,beta12,beta0T,beta01,beta02,beta10-(gamma1/gamma0)*beta00,gamma1,beta00-(gamma0/gamma1)*beta10,ATE_true)
  
  ###pooling vector for estimates###
  ATE_hat_z <- matrix(0,Rep,T); ATE_hat_x <- matrix(0,Rep,T); ATE_hat_w <- matrix(0,Rep,T); ATE_hat_xw <- matrix(0,Rep,T); ATE_hat_xwz <- matrix(0,Rep,T)
  OLS <- matrix(0,Rep,T); FE <- matrix(0,Rep,T)
  
  ##potential outcomes models##
  
  ##preliminary##
  TT <- t(matrix(1:T,T,N))
  
  m_u0u1 <- rep(0,2)
  sigma_u0u1 <- matrix(1/0.91,2,2)
  sigma_u0u1[1,2] <- 0.5/0.91
  sigma_u0u1[2,1] <- 0.5/0.91
  m_v0v1 <- rep(0,2)
  sigma_v0v1 <- matrix(1,2,2)
  sigma_v0v1[1,2] <- 0.5
  sigma_v0v1[2,1] <- 0.5
  
  ##repitation begins##
  
  for(r in 1:Rep){
    
    ##error components##
    u0u1_0 <- mvrnorm(N,m_u0u1,sigma_u0u1)
    v0v1 <- mvrnorm(N,m_v0v1,sigma_v0v1)
    u0 <- matrix(0,N,T); u1 <- matrix(0,N,T)
    u0_0 <- u0u1_0[,1]; u1_0 <- u0u1_0[,2]
    u0[,1] <- 0.3*u0_0 + v0v1[,1]
    u1[,1] <- 0.3*u1_0 + v0v1[,2]
    for(t in 2:T){
      v0v1 <- mvrnorm(N,m_v0v1,sigma_v0v1)
      u0[,t] <- 0.3*u0[,t-1] + v0v1[,1]
      u1[,t] <- 0.3*u1[,t-1] + v0v1[,2]
    }
    
    eta1 <- matrix(0,N,T)
    eta1_0 <- rnorm(N,0,1/0.91)
    eta1[,1] <- 0.3*eta1_0 + rnorm(N,0,1)
    for(t in 2:T){
      eta1[,t] <- 0.3*eta1[,t-1] + rnorm(N,0,1)
    }
    eta2 <- matrix(0,N,T)
    eta2_0 <- rnorm(N,0,1/0.91)
    eta2[,1] <- 0.3*eta2_0 + rnorm(N,0,1)
    for(t in 2:T){
      eta2[,t] <- 0.3*eta2[,t-1] + rnorm(N,0,1)
    }
    uD <- matrix(0,N,T)
    uD_0 <- rnorm(N,0,1/0.91)
    uD[,1] <- 0.3*uD_0 + rnorm(N,0,1)
    for(t in 2:T){
      uD[,t] <- 0.3*uD[,t-1] + rnorm(N,0,1)
    }
    
    ##exogeneous variables and fixed effects##
    C <- rnorm(N,1,1)
    X <- matrix(0.7,N,T) + 0.3*matrix(C,N,T) + eta1
    W <- matrix(1,N,T) + eta2
    
    ##instrumental variables##
    etaZ <- matrix(0,N,T)
    etaZ_0 <- rnorm(N,0,1/0.91)
    etaZ[,1] <- 0.3*etaZ_0 + rnorm(N,0,1)
    for(t in 2:T){
      etaZ[,t] <- 0.3*etaZ[,t-1] + rnorm(N,0,1)
    }
    Z <- matrix(C,N,T) + etaZ
    
    ##treatment assignment##
    D <- matrix(0,N,T)
    for(t in 1:T){
      for(i in 1:N){
        D[i,t] <- (alpha0 + alpha1*X[i,t] + alpha2*W[i,t] + C[i] + uD[i,t]>0)*1
      }
    }
    
    #potential outcomes#
    Y1 <- matrix(beta10,N,T) + beta1T*TT + beta11*X + beta12*W + gamma1*matrix(C,N,T) + u1
    Y0 <- matrix(beta00,N,T) + beta0T*TT + beta01*X + beta02*W + gamma0*matrix(C,N,T) + u0
  
    #observed outcome
    Y <- D*Y1 + (1-D)*Y0
    
    ###Proposed Estimation###
    
    ##transformation##
    Y1_bar <- c(); Y0_bar <- c(); X1_bar <- c(); X0_bar <- c()
    W1_bar <- c(); W0_bar <- c(); TT1_bar <- c(); TT0_bar <- c()
    for(i in 1:N){
      if (sum(D[i,])==0){Y1_bar[i] <- 0; X1_bar[i] <- 0; W1_bar[i] <- 0; TT1_bar[i] <- 0}
      else{Y1_bar[i] <- sum((D*Y1)[i,])/sum(D[i,]); X1_bar[i] <- sum((D*X)[i,])/sum(D[i,]);
      W1_bar[i] <- sum((D*W)[i,])/sum(D[i,]); TT1_bar[i] <- sum((D*TT)[i,])/sum(D[i,])}
      if (sum(1-D[i,])==0){Y0_bar[i] <- 0; X0_bar[i] <- 0; W0_bar[i] <- 0; TT0_bar[i] <- 0}
      else{Y0_bar[i] <- sum(((1-D)*Y0)[i,])/sum(1-D[i,]); X0_bar[i] <- sum(((1-D)*X)[i,])/sum(1-D[i,]);
      W0_bar[i] <- sum(((1-D)*W)[i,])/sum(1-D[i,]); TT0_bar[i] <- sum(((1-D)*TT)[i,])/sum(1-D[i,])}
    }
    Y1_dash <- Y - matrix(Y1_bar,N,T); Y0_dash <- Y - matrix(Y0_bar,N,T)
    X1_dash <- X - matrix(X1_bar,N,T); X0_dash <- X - matrix(X0_bar,N,T)
    W1_dash <- W - matrix(W1_bar,N,T); W0_dash <- W - matrix(W0_bar,N,T)
    TT1_dash <- T - matrix(TT1_bar,N,T); TT0_dash <- T - matrix(TT0_bar,N,T)
    
    D1_mover <- matrix(0,N,T); D0_mover <- matrix(0,N,T)
    for(i in 1:N){
      if (sum(D[i,])!=T){D1_mover[i,] <- D[i,]}
      if (sum(D[i,])!=0){D0_mover[i,] <- 1-D[i,]}
    }
    
    ##GMM function##
    g_z_mean <- function(par,Z){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      g3_1 <- mean(D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_2 <- mean(D1_mover*Z*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_2 <- mean(D0_mover*Z*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g_ate <- c()
      for(t in 1:T){
        g_ate[t] <- mean(D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1))) + mean((1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t])) - par[9+t]
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g4_1,g4_2,t(g_ate))
    }
    
    g_xw_mean <- function(par){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      g3_1 <- mean(D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_2 <- mean(D1_mover*X*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_3 <- mean(D1_mover*W*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_2 <- mean(D0_mover*X*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_3 <- mean(D0_mover*W*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g_ate <- c()
      for(t in 1:T){
        g_ate[t] <- mean(D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1))) + mean((1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t])) - par[9+t]
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g4_1,g4_2,g4_3,t(g_ate))
    }
    
    g_xwz_mean <- function(par){
      g1_1 <- mean(D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g1_2 <- mean(D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3])))
      g2_1 <- mean((1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      g2_2 <- mean((1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6])))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      g3_1 <- mean(D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_2 <- mean(D1_mover*X*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_3 <- mean(D1_mover*W*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g3_4 <- mean(D1_mover*Z*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0))
      g4_1 <- mean(D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_2 <- mean(D0_mover*X*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_3 <- mean(D0_mover*W*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g4_4 <- mean(D0_mover*Z*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1))
      g_ate <- c()
      for(t in 1:T){
        g_ate[t] <- mean(D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1))) + mean((1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t])) - par[9+t]
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g3_4,g4_1,g4_2,g4_3,g4_4,t(g_ate))
    }
    
    g_z <- function(par,Z){
      g1_1 <- rep(0,N); g1_2 <- rep(0,N); g2_1 <- rep(0,N); g2_2 <- rep(0,N)
      g3_1 <- rep(0,N); g3_2 <- rep(0,N); g4_1 <- rep(0,N); g4_2 <- rep(0,N)
      G_ate <- matrix(0,N,T)
      
      G1_1 <- D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G1_2 <- D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      G3_1 <- D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_2 <- D1_mover*Z*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G4_1 <- D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_2 <- D0_mover*Z*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      for(t in 1:T){
        G_ate[,t] <- D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1)) + (1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t]) - par[9+t]
      }
      for(t in 1:T){
        g1_1 <- g1_1 + G1_1[,t]/T; g1_2 <- g1_2 + G1_2[,t]/T; g2_1 <- g2_1 + G2_1[,t]/T; g2_2 <- g2_2 + G2_2[,t]/T 
        g3_1 <- g3_1 + G3_1[,t]/T; g3_2 <- g3_2 + G3_2[,t]/T; g4_1 <- g4_1 + G4_1[,t]/T; g4_2 <- g4_2 + G4_2[,t]/T
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g4_1,g4_2,G_ate)
    }
    
    g_xw <- function(par){
      g1_1 <- rep(0,N); g1_2 <- rep(0,N); g2_1 <- rep(0,N); g2_2 <- rep(0,N)
      g3_1 <- rep(0,N); g3_2 <- rep(0,N); g3_3 <- rep(0,N); g4_1 <- rep(0,N); g4_2 <- rep(0,N); g4_3 <- rep(0,N); g5 <- rep(0,N); g6 <- rep(0,N)
      G_ate <- matrix(0,N,T)
      
      G1_1 <- D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G1_2 <- D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      G3_1 <- D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_2 <- D1_mover*X*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_3 <- D1_mover*W*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G4_1 <- D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_2 <- D0_mover*X*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_3 <- D0_mover*W*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      for(t in 1:T){
        G_ate[,t] <- D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1)) + (1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t]) - par[9+t]
      }
      
      for(t in 1:T){
        g1_1 <- g1_1 + G1_1[,t]/T; g1_2 <- g1_2 + G1_2[,t]/T; g2_1 <- g2_1 + G2_1[,t]/T; g2_2 <- g2_2 + G2_2[,t]/T 
        g3_1 <- g3_1 + G3_1[,t]/T; g3_2 <- g3_2 + G3_2[,t]/T; g3_3 <- g3_3 + G3_3[,t]/T
        g4_1 <- g4_1 + G4_1[,t]/T; g4_2 <- g4_2 + G4_2[,t]/T; g4_3 <- g4_3 + G4_3[,t]/T
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g4_1,g4_2,g4_3,G_ate)
    }
    
    g_xwz <- function(par){
      g1_1 <- rep(0,N); g1_2 <- rep(0,N); g2_1 <- rep(0,N); g2_2 <- rep(0,N)
      g3_1 <- rep(0,N); g3_2 <- rep(0,N); g3_3 <- rep(0,N); g3_4 <- rep(0,N)
      g4_1 <- rep(0,N); g4_2 <- rep(0,N); g4_3 <- rep(0,N); g4_4 <- rep(0,N)
      g5 <- rep(0,N); g6 <- rep(0,N); G_ate <- matrix(0,N,T)
      
      G1_1 <- D*(X1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G1_2 <- D*(W1_dash*(Y1_dash - TT1_dash*par[1] - X1_dash*par[2] - W1_dash*par[3]))
      G2_1 <- (1-D)*(X0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      G2_2 <- (1-D)*(W0_dash*(Y0_dash - TT0_dash*par[4] - X0_dash*par[5] - W0_dash*par[6]))
      ETA0 <- Y0_bar - TT0_bar*par[4] - X0_bar*par[5] - W0_bar*par[6]
      ETA1 <- Y1_bar - TT1_bar*par[1] - X1_bar*par[2] - W1_bar*par[3]
      G3_1 <- D1_mover*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_2 <- D1_mover*X*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_3 <- D1_mover*W*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G3_4 <- D1_mover*Z*(Y - par[7] - TT*par[1] - X*par[2] - W*par[3] - par[8]*ETA0)
      G4_1 <- D0_mover*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_2 <- D0_mover*X*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_3 <- D0_mover*W*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      G4_4 <- D0_mover*Z*(Y - par[9] - TT*par[4] - X*par[5] - W*par[6] - (1/par[8])*ETA1)
      for(t in 1:T){
        G_ate[,t] <- D[,t]*(Y[,t] - (par[9] + TT[,t]*par[4] + X[,t]*par[5] + W[,t]*par[6] + (1/par[8])*ETA1)) + (1-D[,t])*(par[7] + TT[,t]*par[1] + X[,t]*par[2] + W[,t]*par[3] + par[8]*ETA0- Y[,t]) - par[9+t]
      }
      
      for(t in 1:T){
        g1_1 <- g1_1 + G1_1[,t]/T; g1_2 <- g1_2 + G1_2[,t]/T; g2_1 <- g2_1 + G2_1[,t]/T; g2_2 <- g2_2 + G2_2[,t]/T 
        g3_1 <- g3_1 + G3_1[,t]/T; g3_2 <- g3_2 + G3_2[,t]/T; g3_3 <- g3_3 + G3_3[,t]/T; g3_4 <- g3_4 + G3_4[,t]/T
        g4_1 <- g4_1 + G4_1[,t]/T; g4_2 <- g4_2 + G4_2[,t]/T; g4_3 <- g4_3 + G4_3[,t]/T; g4_4 <- g4_4 + G4_4[,t]/T
      }
      cbind(g1_1,g1_2,g2_1,g2_2,g3_1,g3_2,g3_3,g3_4,g4_1,g4_2,g4_3,g4_4,G_ate)
    }
    
    ##First step##
    f_z <- function(par){
      return(sum((g_z_mean(par,Z))^2))
      }
    result1_z <- optim(par=initial_par,fn=f_z)
    f_x <- function(par){
      return(sum((g_z_mean(par,X))^2))
    }
    result1_x <- optim(par=initial_par,fn=f_x)
    f_w <- function(par){
      return(sum((g_z_mean(par,W))^2))
    }
    result1_w <- optim(par=initial_par,fn=f_w)
    f_xw <- function(par){
      return(sum((g_xw_mean(par))^2))
    }
    result1_xw <- optim(par=initial_par,fn=f_xw)
    f_xwz <- function(par){
      return(sum((g_xwz_mean(par))^2))
    }
    result1_xwz <- optim(par=initial_par,fn=f_xwz)
    
    ##2nd step##
    W_z <- solve(t(g_z(result1_z$par,Z))%*%g_z(result1_z$par,Z)/N)
    W_x <- solve(t(g_z(result1_x$par,X))%*%g_z(result1_x$par,X)/N)
    W_w <- solve(t(g_z(result1_w$par,W))%*%g_z(result1_w$par,W)/N)
    W_xw <- solve(t(g_xw(result1_xw$par))%*%g_xw(result1_xw$par)/N)
    W_xwz <- solve(t(g_xwz(result1_xwz$par))%*%g_xwz(result1_xwz$par)/N)
    
    f_z <- function(par){
      return(g_z_mean(par,Z)%*%W_z%*%t(g_z_mean(par,Z)))
    }
    f_x <- function(par){
      return(g_z_mean(par,X)%*%W_x%*%t(g_z_mean(par,X)))
    }
    f_w <- function(par){
      return(g_z_mean(par,W)%*%W_w%*%t(g_z_mean(par,W)))
    }
    f_xw <- function(par){
      return(g_xw_mean(par)%*%W_xw%*%t(g_xw_mean(par)))
    }
    f_xwz <- function(par){
      return(g_xwz_mean(par)%*%W_xwz%*%t(g_xwz_mean(par)))
    }

    ##Estimation of ATE##
    result2_z <- optim(par=result1_z$par,fn=f_z)
    result2_x <- optim(par=result1_x$par,fn=f_x)
    result2_w <- optim(par=result1_w$par,fn=f_w)
    result2_xw <- optim(par=result1_xw$par,fn=f_xw)
    result2_xwz <- optim(par=result1_xwz$par,fn=f_xwz)

    ##Estimation of ATE##
    for(t in 1:T){
      ATE_hat_z[r,t] <- result2_z$par[8+t]
      ATE_hat_x[r,t] <- result2_x$par[8+t]
      ATE_hat_w[r,t] <- result2_w$par[8+t]
      ATE_hat_xw[r,t] <- result2_xw$par[8+t]
      ATE_hat_xwz[r,t] <- result2_xwz$par[8+t]
    }

    ##creating panel data##
  
    data_mtx <- matrix(0,T*N,7)
    for(i in 1:N){
      for(t in 1:T){
        data_mtx[(T*(i-1)+1):(T*i),1] <- i
        data_mtx[T*(i-1)+t,2] <- t
        data_mtx[T*(i-1)+t,3] <- Y[i,t]
        data_mtx[T*(i-1)+t,4] <- D[i,t]
        data_mtx[T*(i-1)+t,5] <- X[i,t]-mean(X[,t])
        data_mtx[T*(i-1)+t,6] <- W[i,t]-mean(W[,t])
        data_mtx[T*(i-1)+t,7] <- t
        }
      }
    data <- data.frame(data_mtx)
    names(data) <- c("id","t","y","d","x_mdif","w_mdif","t2")
    pdata <- pdata.frame(data, index=c("id","t"))
    
    ###Pooling OLS Estimation###
    result_OLS <- plm(y ~ d:factor(t) + t2 + d*x_mdif + d*w_mdif, data=pdata, model= "pooling")
    
    ###FE Estimation###
    result_FE <- plm(y ~ d:factor(t) + t2 + d*x_mdif + d*w_mdif, data=pdata, model= "within")
    
    ##result acculation ##
    OLS[r,1] <- result_OLS$coefficient[[3]]
    FE[r,1] <- result_FE$coefficient[[2]]
    for(t in 1:(T-1)){
      OLS[r,t+1] <- result_OLS$coefficient[[3]] + result_OLS$coefficient[[5+t]]
      FE[r,t+1] <- result_FE$coefficient[[2]] + result_FE$coefficient[[4+t]]
    }
    ##repitation ends##
    }
  bias_OLS <- OLS - t(ATE_true*matrix(1,T,Rep))
  bias_FE <- FE - t(ATE_true*matrix(1,T,Rep))
  bias_ATE_hat_z <- ATE_hat_z - t(ATE_true*matrix(1,T,Rep))
  bias_ATE_hat_x <- ATE_hat_x - t(ATE_true*matrix(1,T,Rep))
  bias_ATE_hat_w <- ATE_hat_w - t(ATE_true*matrix(1,T,Rep))
  bias_ATE_hat_xw <- ATE_hat_xw - t(ATE_true*matrix(1,T,Rep))
  bias_ATE_hat_xwz <- ATE_hat_xwz - t(ATE_true*matrix(1,T,Rep))
  
  sd_OLS <- c(); sd_FE <- c()
  sd_ATE_hat_z <- c(); sd_ATE_hat_x <- c(); sd_ATE_hat_w <- c()
  sd_ATE_hat_xw <- c(); sd_ATE_hat_xwz <- c()
  RMSE_OLS <- c(); RMSE_FE <- c()
  RMSE_ATE_hat_z <- c(); RMSE_ATE_hat_x <- c(); RMSE_ATE_hat_w <- c()
  RMSE_ATE_hat_xw <- c(); RMSE_ATE_hat_xwz <- c()
  for(t in 1:T){
    sd_OLS[t] <- sd(OLS[,t])
    sd_FE[t] <- sd(FE[,t])
    sd_ATE_hat_z[t] <- sd(ATE_hat_z[,t])
    sd_ATE_hat_x[t] <- sd(ATE_hat_x[,t])
    sd_ATE_hat_w[t] <- sd(ATE_hat_w[,t])
    sd_ATE_hat_xw[t] <- sd(ATE_hat_xw[,t])
    sd_ATE_hat_xwz[t] <- sd(ATE_hat_xwz[,t])
    RMSE_OLS[t] <-sqrt(mean((bias_OLS[,t])^2))
    RMSE_FE[t] <-sqrt(mean((bias_FE[,t])^2))
    RMSE_ATE_hat_z[t] <-sqrt(mean((bias_ATE_hat_z[,t])^2))
    RMSE_ATE_hat_x[t] <-sqrt(mean((bias_ATE_hat_x[,t])^2))
    RMSE_ATE_hat_w[t] <-sqrt(mean((bias_ATE_hat_w[,t])^2))
    RMSE_ATE_hat_xw[t] <-sqrt(mean((bias_ATE_hat_xw[,t])^2))
    RMSE_ATE_hat_xwz[t] <-sqrt(mean((bias_ATE_hat_xwz[,t])^2))
  }
  
  return(
    list(
    ###Mean bias###
    c(
      mean(bias_OLS), mean(bias_FE),
      mean(bias_ATE_hat_z), mean(bias_ATE_hat_x), mean(bias_ATE_hat_w),
      mean(bias_ATE_hat_xw),mean(bias_ATE_hat_xwz)
      ),
    ###SD###
    c(
      mean(sd_OLS), mean(sd_FE),
      mean(sd_ATE_hat_z), mean(sd_ATE_hat_x), mean(sd_ATE_hat_w),
      mean(sd_ATE_hat_xw),mean(sd_ATE_hat_xwz)
    ),
    ###RMSE###
    c(
      mean(RMSE_OLS), mean(RMSE_FE),
      mean(RMSE_ATE_hat_z), mean(RMSE_ATE_hat_x), mean(RMSE_ATE_hat_w),
      mean(RMSE_ATE_hat_xw),mean(RMSE_ATE_hat_xwz)
    )
  )
)
}  


###Results###
Monte_carlo(200,10,1000)
Monte_carlo(500,10,1000)
Monte_carlo(800,10,1000)
Monte_carlo(200,20,1000)
Monte_carlo(500,20,1000)
Monte_carlo(800,20,1000)
Monte_carlo(200,30,1000)
Monte_carlo(500,30,1000)
Monte_carlo(800,30,1000)
