#' Main function for implementing the IMPUTED ESTIMATOR. 
#'
#' @param formula regression formula, like Hour ~ log(HOURWAGE) + AGE + I(AGE^2)  | AGE + I(AGE^2) + decileWAGE. Here the endogenous independent variable is log(HOURWAGE), and the IV for it is decileWAGE.
#' @param data name of data
#' @param x.report The names of the variables in the formula, which will be reported in the return of this function. For example, c("log(HOURWAGE)", "AGE").
#' @param x.names Similar to x.report, but do not add math operators like log. For example, c("HOURWAGE", "AGE").
#' @param elsty.type A vector of 1 or 2 with the same length as x.report. elsty.type = 1 means elasticity = β / mean of weekly hours (e.g. log(HOURWAGE), NCHLT5) elsty.type = 2 means elasticity = β * (mean of x / mean of weekly hours) (e.g. SPWAGE)
#' @return List of param (parameter estimates), elsty (estimates of elasticities), vcov.atus (variance matrix), r.squared (R squared)
tusreg <- function(formula, data, x.report, x.names = x.report, elsty.type) {
  formula <- Formula::as.Formula(formula)
  stopifnot(length(formula)[1] == 1L, length(formula)[2] %in% 1:2)
  has_dot <- function(formula) inherits(try(terms(formula),
      silent = TRUE), "try-error")
  if (has_dot(formula)) {
    f1 <- formula(formula, rhs = 1)
    f2 <- formula(formula, lhs = 0, rhs = 2)
    if (!has_dot(f1) & has_dot(f2))
      formula <- Formula::as.Formula(f1, update(formula(formula,
            lhs = 0, rhs = 1), f2))
  }

  # formula for predicting daily hours
  if (length(formula)[2] == 1) {
    f.day <- formula(formula, lhs = 1, rhs = 1)
    f.xz <- formula(formula, lhs = 0, rhs = 1) # x part of the formula (no z)
  } else {
    # this is IV formula
    f.day <- formula(formula, lhs = 1, rhs = 2)
    f.xz <- formula(formula, lhs = 0, rhs = 1 : 2) # x and z part of the formula
  }

  # step 1: predict hours using IV
  # sanitizeX is to create a formula of Hour ~ Z for each day. This is because
  # the prediction data might contain levels that are absent in the estimation
  # data. In these cases, we will omit that factor in regression Hour ~ Z.
  sanitizeX <- function(day) {
    d <- list()
    d[[1]] <- data %>% filter(DAY == day) # responded on day
    d[[2]] <- data %>% filter(DAY != day) # not responded
    l <- list()
    tm.lb <- attr(terms(f.day), "term.labels")
    oldw <- getOption("warn")
    options(warn = -1)
    for (i in 1 : 2) {
      l[[i]] <- d[[i]] %>% 
        select(one_of(tm.lb)) %>% select_if(is.factor) %>% 
        # 'droplevels' is to remove the levels that did not appear in the data.
        mutate_all(droplevels) %>% 
        map( ~ levels(.))
    }
    options(warn = oldw)
    # The levels in the prediction data should be a subset of the estimation data 
    is.x.bad <- c()
    for (i in 1 : length(l[[1]])) {
      is.x.bad[i] <- ! (is.element(l[[2]][[i]], l[[1]][[i]]) %>% all())
    }
    bad.x.pos <- match(
      names(l[[1]])[is.x.bad], # names of bad x variables
      tm.lb
    )
    if (length(bad.x.pos) > 0) {
      f.day.new <- reformulate(
        drop.terms(terms(f.day), bad.x.pos) %>% attr(., 'term.labels'),
        response = 'Hour')
      return(f.day.new)
    } else {
      return(f.day)
    }
  }
  # Compute the var(U) from ivreg object. The ivreg$sigma seems to be wrong. So
  # I just use the formula to do this job.
  var.ivreg <- function(iv1) sum(iv1$residuals ^ 2) / iv1$df.residual

  # (a) Impute hours worked on each day; (b) Run within estimator.
  # 'b' = list(βhat{1}, ..., βhat{7})
  r.squared <- c(); b <- list(); var.u <- c(); rnt <- c()
  resd <- list() # track the projection residuals V
  sandwich.day <- list() # save the (weighted) sandwich variance of IV regression for each day
  for (day in 1 : 7) {
    # OLS for predicting hours
    f.day.b <- sanitizeX(day)
    d1 <- data %>% filter(DAY == day)
    lm.day <- lm(f.day.b, data = d1, weights = WT06)
    # HourHat1, ..., HourHat7 are imputed hours worked on day 1, ..., 7.
    data <- data %>% 
      mutate(!! paste0('HourHat', day) := predict(lm.day, data))
    # IV regression for one day ('ivreg' also works as OLS routine)
    iv.one.day <- ivreg(formula, data = d1, weights = WT06)
    resd[[day]] <- tibble(
      V = lm.day$residuals, U.day = iv.one.day$residuals,
      DAY = day, CASEID = d1$CASEID)
    r.squared[day] <- summary(iv.one.day)$r.squared # R squared of IV reg per day
    b[[day]] <- coef(iv.one.day)
    var.u[day] <- var.ivreg(iv.one.day)
    rnt[day] <- nrow(data) / nrow(d1)
    sandwich.day[[day]] <- pastrami(iv.one.day)$sandwich
  }
  resd <- bind_rows(resd)

  # WITHIN ESTIMATORS #
  # Some occupations or industries may be absent in some days. So the elements
  # of b list contain different variables. I need to find and keep only the
  # common ones first.
  # Coefficients names that appear in the IV regression of everyday
  cmn.vars <- b %>% map(names) %>% reduce(intersect)
  # Sum the estimated coefficients over the week
  b.wthn <- b %>% map(~ .[cmn.vars]) %>% reduce(`+`)
  # Sum the weighted sandwich variance over the week
  vcov.wthn <- sandwich.day %>% map(~ .[cmn.vars, cmn.vars]) %>% reduce(`+`)
  est.wthn <- list(
    r.squared = mean(r.squared), 
    b = b.wthn,
    vcov = vcov.wthn
    )

  # (b) Run 2SLS regression using imputed weekly hours
  data <- data %>% 
    mutate(
      HourWeek = data %>% select(starts_with('HourHat')) %>% pmap_dbl(sum) # pmap to sum across columns
      )
  formula.wk <- update(Formula::Formula(HourWeek ~ .), f.xz)
  iv.im <- ivreg(formula.wk, data = data, weights = WT06, x = TRUE)
  # Add the second step residuals to resd
  resd <- resd %>% left_join(data %>% select(CASEID) %>% mutate(U = iv.im$residuals), by = "CASEID")
  est.im <- list(b = coef(iv.im))

  # Pool estimator (treat the multiplied the daily hours as the weekly hours, and run 2SLS)
  data <- data %>% group_by(DAY) %>%
    mutate(
      r = nrow(data) / n(),
      HourWeighted = Hour * r
      ) %>% ungroup()
  formula.wght <-  update(Formula::Formula(HourWeighted ~ .), f.xz)
  # IV regression using day estimator
  iv.pool <- ivreg(formula.wght, data = data, weights = WT06, x = TRUE)
  est.pool <- list(r.squared = summary(iv.pool)$r.squared, b = coef(iv.pool))

  # Variance A (for imputed estimator only)
  est.im$vcov <- var.im(iv.im, resd)

  # Variance B (for pooled estimator)
  est.pool$vcov <- var.pool(iv.pool, resd, data %>% select(starts_with('HourHat')))
  est.pool$vcov_2SLS <- pastrami(iv.pool)$sandwich

  # Merge estimates and std err to a beautiful tibble
  bhat <- tibble(
    Variables = x.report,
    im = est.im$b[x.report],
    wthn = est.wthn$b[x.report],
    pool = est.pool$b[x.report],
    pool_2SLS = est.pool$b[x.report]
    )
  se <- tibble(
    Variables = x.report,
    im = est.im$vcov[x.report, x.report] %>% diag() %>% sqrt(),
    wthn = NA,
    pool = est.pool$vcov[x.report, x.report] %>% diag() %>% sqrt(),
    pool_2SLS = est.pool$vcov_2SLS[x.report, x.report] %>% diag() %>% sqrt()
    )
  param <- stack.tib(bhat, surround(se))

  # Create a similar table for elasticities
  means <- data %>% select(HourWeek, x.names) %>% summarize_all(mean)
  cat('Mean of relevant variables:\n')
#   print.data.frame(as.data.frame(means), digits = 3)
  print(slice(means, 1)$HourWeek, digits = 4)
  # Multiple βhat and se(β) by adj to get elasticity and their respective std err.
  adj <- rep(100 / means[['HourWeek']], length(x.names))
  pos.type2 <- which(elsty.type == 2)
  adj[pos.type2] <- adj[pos.type2] * unlist(means[x.names[pos.type2]])
  adj.fun <- function(tib) tib %>% select(Variables) %>% bind_cols(tib %>% select(-Variables) * adj)
  ehat <- adj.fun(bhat)
  ese <- adj.fun(se)
  elsty <- stack.tib(ehat, surround(ese, dgt = 2))

  # return
  list(
    param = param,
    elsty = elsty,
    vcov.atus = est.im$vcov[x.report, x.report],
    r.squared = tibble(Variables = "R squared", im = est.wthn$r.squared, pool = est.pool$r.squared)
  )
}

cpsreg <- function(formula, data, x.report, x.names = x.report, elsty.type) {
  # Regression formula
  formula <- Formula::as.Formula(formula)
  stopifnot(length(formula)[1] == 1L, length(formula)[2] %in% 1:2)
  has_dot <- function(formula) inherits(try(terms(formula),
      silent = TRUE), "try-error")
  if (has_dot(formula)) {
    f1 <- formula(formula, rhs = 1)
    f2 <- formula(formula, lhs = 0, rhs = 2)
    if (!has_dot(f1) & has_dot(f2))
      formula <- Formula::as.Formula(f1, update(formula(formula,
            lhs = 0, rhs = 1), f2))
  }
  iv.cps <- ivreg(formula, data = data, weights = WT06)
  est.cps <- list(
    r.squared = summary(iv.cps)$r.squared,
    b = coef(iv.cps),
    vcov = pastrami(iv.cps)$sandwich
    )

  # Merge estimates and std err to a beautiful tibble
  bhat <- tibble(
    Variables = x.report,
    cps = est.cps$b[x.report]
    )
  se <- tibble(
    Variables = x.report,
    cps = est.cps$vcov[x.report, x.report] %>% diag() %>% sqrt()
    )
  param <- stack.tib(bhat, surround(se))
  # Elasticities
  means <- data %>% select(HRSATRATE, x.names) %>% summarize_all(mean)
  # Multiple βhat and se(β) by adj to get elasticity and their respective std err.
  adj <- rep(100 / means[['HRSATRATE']], length(x.names))
  pos.type2 <- which(elsty.type == 2)
  adj[pos.type2] <- adj[pos.type2] * unlist(means[x.names[pos.type2]])
  adj.fun <- function(tib) tib %>% select(Variables) %>% bind_cols(tib %>% select(-Variables) * adj)
  ehat <- adj.fun(bhat)
  ese <- adj.fun(se)
  elsty <- stack.tib(ehat, surround(ese, dgt = 2))

  # return
  list(
    param = param,
    elsty = elsty,
    vcov = est.cps$vcov[x.report, x.report],
    r.squared = tibble(Variables = "R squared", cps = est.cps$r.squared)
  )
}

cpsreg2 <- function(formula, data, x.report, x.names = x.report, elsty.type) {
  # Regression formula
  formula <- Formula::as.Formula(formula)
  stopifnot(length(formula)[1] == 1L, length(formula)[2] %in% 1:2)
  has_dot <- function(formula) inherits(try(terms(formula),
      silent = TRUE), "try-error")
  if (has_dot(formula)) {
    f1 <- formula(formula, rhs = 1)
    f2 <- formula(formula, lhs = 0, rhs = 2)
    if (!has_dot(f1) & has_dot(f2))
      formula <- Formula::as.Formula(f1, update(formula(formula,
            lhs = 0, rhs = 1), f2))
  }
  iv.cps <- ivreg(formula, data = data, weights = ASECWT)
  est.cps <- list(
    r.squared = summary(iv.cps)$r.squared,
    b = coef(iv.cps),
    vcov = pastrami(iv.cps)$sandwich
    )

  # Merge estimates and std err to a beautiful tibble
  bhat <- tibble(
    Variables = x.report,
    cps = est.cps$b[x.report]
    )
  se <- tibble(
    Variables = x.report,
    cps = est.cps$vcov[x.report, x.report] %>% diag() %>% sqrt()
    )
  param <- stack.tib(bhat, surround(se))
  # Elasticities
  means <- data %>% select(HRSATRATE, x.names) %>% summarize_all(mean)
  # Multiple βhat and se(β) by adj to get elasticity and their respective std err.
  adj <- rep(100 / means[['HRSATRATE']], length(x.names))
  pos.type2 <- which(elsty.type == 2)
  adj[pos.type2] <- adj[pos.type2] * unlist(means[x.names[pos.type2]])
  adj.fun <- function(tib) tib %>% select(Variables) %>% bind_cols(tib %>% select(-Variables) * adj)
  ehat <- adj.fun(bhat)
  ese <- adj.fun(se)
  elsty <- stack.tib(ehat, surround(ese, dgt = 2))

  # return
  list(
    param = param,
    elsty = elsty,
    vcov = est.cps$vcov[x.report, x.report],
    r.squared = tibble(Variables = "R squared", cps = est.cps$r.squared)
  )
}


# Compute variance using the new formual allowing for HSK.
var.im <- function(iv.im, resd) {
  # INPUTS:
  # - 'iv.im': an ivreg object from the SECOND STEP of IV regression AFTER
  # IMPUTING weekly hours.
  # - resd has three columns 'U', 'V' and 'DAY', where 'U' is the residuals of IV
  # regression of \hat{H}^{w} on X{i} with IV Z{i}, and 'V' is the residuals of
  # linear regression of H{it} on Z{i} when 'DAY' = t.
  # OUTPUT >>
  # 'Omega' the vcov of imputed estimator using DTUS correlation matrix.
  Xhat <- model.matrix(iv.im, component = 'projected') # The xhat after the firste stage IV
  X <- iv.im$x$"regressors"
  Z <- iv.im$x$"instruments"
  if (is.null(Z)) {
    # Z will be NULL if there were no instruments. Let Z = X in this case.
    Z <- X
  }
  n <- nrow(Xhat)
  A.inv <- solve(crossprod(Xhat) / n) # This is inverse(B C^(-1) B')
  B <- crossprod(X, Z) / n
  C <- crossprod(Z, Z) / n
  ABC <- A.inv %*% B %*% solve(C) # A^(-1)BC^(-1) bread matrix.
  # Column V of resd is the residuals from Hours ~ Z; U is the residuals
  # from HoursWeekHat ~ X
  rnt <- resd %>% arrange(DAY) %>% group_by(DAY) %>% summarize(r = nrow(resd) / n()) %>% pull(r)
  DAY <- resd$DAY
  lambda <- matrix(0, nrow = ncol(ABC), ncol = ncol(ABC))
  for (d in 1 : 7) {
    l1 <- which(DAY == d)
    DV <- Diagonal(x = resd %>% filter(DAY == d) %>% pull(V))
    DU <- Diagonal(x = resd %>% filter(DAY == d) %>% pull(U))
    part1 <- rnt[d] * crossprod(Z[l1, ], (DV %*% DV) %*% Z[l1, ]) / length(l1)
    part2 <- 2 * crossprod(Z[l1, ], (DV %*% DU) %*% Z[l1, ]) / length(l1)
    part3 <- crossprod(Z[l1, ], (DU %*% DU) %*% Z[l1, ]) / length(l1)
    lambda <- lambda + (part1 + part2 + part3)
  }
  Omega <- tcrossprod(ABC %*% lambda, ABC) / nrow(resd)
  Omega
}

# Compaute variance of pooled estimator.
var.pool <- function(iv.pool, resd, data) {
  # INPUTS:
  # - 'iv.pool': an ivreg object from the pooled regression.
  # - resd has three columns 'U', 'V' and 'DAY', where 'U' is the residuals of IV
  # regression of \hat{H}^{w} on X{i} with IV Z{i}, and 'V' is the residuals of
  # linear regression of H{it} on Z{i} when 'DAY' = t.
  # - We need 'data' for Z'\alpha_{t} that HourHat{t}.
  # OUTPUT >>
  # 'Omega' the vcov of imputed estimator using DTUS correlation matrix.
  Xhat <- model.matrix(iv.pool, component = 'projected') # The xhat after the firste stage IV
  X <- iv.pool$x$"regressors"
  Z <- iv.pool$x$"instruments"
  if (is.null(Z)) {
    # Z will be NULL if there were no instruments. Let Z = X in this case.
    Z <- X
  }
  n <- nrow(Xhat)
  A.inv <- solve(crossprod(Xhat) / n) # This is inverse(B C^(-1) B')
  B <- crossprod(X, Z) / n
  C <- crossprod(Z, Z) / n
  ABC <- A.inv %*% B %*% solve(C) # A^(-1)BC^(-1) bread matrix.
  # Column V of resd is the residuals from Hours ~ Z; U is the residuals
  # from HoursWeekHat ~ X
  rnt <- resd %>% arrange(DAY) %>% group_by(DAY) %>% summarize(r = nrow(resd) / n()) %>% pull(r)
  DAY <- resd$DAY
  lambda <- matrix(0, nrow = ncol(ABC), ncol = ncol(ABC))
  DXBeta <- Diagonal(x = iv.pool$fitted.values) # n-by-n diagonal matrix consisting of fitted values of the last stage
  for (d in 1 : 7) {
    l1 <- which(DAY == d)
    DV <- Diagonal(x = resd %>% filter(DAY == d) %>% pull(V))
    DU <- Diagonal(x = resd %>% filter(DAY == d) %>% pull(U))
    DZAlpha <- Diagonal(x = data %>% pull(paste0("HourHat", d))) # n-by-n Diagonal matrix consisting of Z'alpha{t}
    part1 <- rnt[d] * crossprod(Z[l1, ], (DV %*% DV) %*% Z[l1, ]) / length(l1)
    part2 <- rnt[d] * crossprod(Z, (DZAlpha %*% DZAlpha) %*% Z) / n
    # There is only one part3 (instead of sum over 7 days). I add part3 after the loop over 7 days.
    part4 <- 2 * crossprod(Z, (DZAlpha %*% DXBeta) %*% Z) / n
    part5 <- 2 * crossprod(Z[l1, ], (DV %*% DU) %*% Z[l1, ]) / length(l1)
    lambda <- lambda + (part1 + part2 - part4 + part5)
  }
  part3 <- crossprod(Z, (DXBeta %*% DXBeta) %*% Z) / n
  lambda <- lambda + part3
  Omega <- tcrossprod(ABC %*% lambda, ABC) / nrow(resd)
  Omega
}

surround <- function(tib, lhs = '(', rhs = ')', dgt = 3) {
  nm <- tib %>% select_if(~ !is.character(.)) %>% names()
  m <- tib %>% select_if(~ !is.character(.)) %>% as.matrix() %>% round(., digits = dgt)
  m1 <- m %>% paste0(lhs, ., rhs)
  dim(m1) <- dim(m)
  m1 <- as_tibble(m1)
  names(m1) <- nm
  for (v in nm) {
    tib[[v]] <- m1[[v]]
  }
  # Replace NA back
  tib[tib == paste0(lhs, 'NA', rhs)] <- NA
  tib
}

stack.tib <- function(..., key = NULL) {
  arg <- list(...)
  if (is.null(key)) key <- names(arg[[1]])[1]
  if (names(arg[[1]])[1] != key) stop('The first column should be variables (key)!')
  if(arg %>% map(~ names(.)) %>% unique() %>% length() != 1) stop('The column names and ordering should be identical')
  nm <- names(arg[[1]])
  l <- list()
  for (i in 2 : ncol(arg[[1]])) {
    tmp <- arg %>% map(~  select(., 1, i)) %>% reduce(left_join, by = key)
    part <- tmp %>% select(-1) %>% t() %>% as_tibble() %>% gather()
    part['key'] <- tmp %>% pull(key) %>% map(~ c(., rep(NA, length(arg) - 1))) %>% unlist()
    names(part) <- c(key, nm[i])
    l[[i-1]] <- part
  }
  l[[1]] %>% select(key) %>% 
    bind_cols(.,
      l %>% map(~ select(., - key)) %>% bind_cols()
    )
}

# bind_cols2 works like bind_col, but it ignores the first column excepting for
# the first element
bind_cols2 <- function(...) {
  lst <- list(...)
  if (length(lst) == 1) lst <- lst[[1]]
  lst2 <- lst[-1]
  lst2 <- lst2 %>% map(~ .[, -1])
  bind_cols(lst[[1]], lst2)
}

appendname <- function(tib, name) {
  # Add name in front of the colname of tib, excepting for the first column.
  cn <- colnames(tib)
  tib %>% `colnames<-` (c(cn[1], paste0(name, cn[-1])))
}

addrow2 <- function(tib, .after) {
  # Add NA rows after the row index in .after
  .after <- .after + cumsum(rep(1, length(.after))) - 1
  for (i in .after) {
    tib <- add_row(tib, .after = i)
  }
  tib
}

hausman <- function(b.ef, b.if, v.ef, v.if) {
  # b.ef and b.if are efficient and inefficient estimators
  # v.ef and v.if are their covariance matrix
  h <- t(b.ef - b.if) %*% solve(v.if - v.ef) %*% (b.ef - b.if) %>% drop()
  p <- 1 - pchisq(h, df = length(b.ef))
  tibble(Variables = "p Value of Hausman test", pValue = p)
}


hausman2 <- function(cps, tus) {
  b.ef <- cps$param %>% drop_na() %>% pull(cps) %>% as.numeric()
  b.if <- tus$param %>% drop_na() %>% pull(im) %>% as.numeric()
  v.ef <- cps$vcov %>% as.matrix()
  v.if <- tus$vcov.atus %>% as.matrix()
  # hausman(b.ef, b.if, v.ef, v.if)
  hausman(b.ef[1], b.if[1], v.ef[1, 1], v.if[1, 1])
}

# A substitute of 'sandwich' function for 'ivreg' function. The sandwich
# formula reads
# sqrt(n) (βhat - β) -> N(0, A^(-1)BC^(-1) E(ZUU'Z')[A^(-1)BC^(-1)]'),
# where A = E(XhatXhat'], B = E(XZ'), C = E(ZZ').
# bread = A^(-1)BC^(-1)
# meat = E(ZUU'Z').
pastrami <- function(iv1) {
  # iv1 is the return from ivreg
  # The return is a list of variance of beta, bread and meat of the sandwich formula
  X <- model.matrix(iv1, component = 'regressors')
  Z <- model.matrix(iv1, component = 'instruments')
  # If 'iv1' is OLS as a special case, Z will be null. Replace it by X itself.
  if (is.null(Z)) {
    Z <- X
  }
  Xhat <- model.matrix(iv1, component = 'projected') # The xhat after the firste stage IV
  n <- nrow(X)
  A <- crossprod(Xhat) / n
  B <- crossprod(X, Z) / n
  C <- crossprod(Z) / n
  # We have toasted bread
  bread <- solve(A) %*% B %*% solve(C)
  # meat is below
  meat <- t(Z) %*% Diagonal(x = iv1$residuals ^ 2) %*% Z / iv1$df.residual
  # stack to make sandwich
  sandwich <- bread %*% meat %*% t(bread) / n
  list(sandwich = sandwich, bread = bread, meat = meat)
}
