# Data generation ---------------------------------------------------------

# Transformation function
h <- function(ystar,cp){
  
  # cp is a matrix with cut points in the second column,
  #  and the corresponding levels of y in the first. An
  #  observation is >= the level in column 1 if its latent
  #  value is >= the level in column 2. Being below cp[1,2]
  #  means the dependent variable is cp[1,1]-1.
  #
  # ystar is the latent index
  
  hstar <- numeric(length(ystar))+cp[1,1]-1
  for(i in 1:nrow(cp)) {
    # If the value of ystar that is passed on exceeds cp[,2]
    #  then overwrite the value of y with the associated 
    #  dependent value.
    hstar[ystar>=cp[i,2]] <- cp[i,1]
  }
  return(hstar)
  
}

icfe_dgp <- function(n = 100,
                     beta1 = 1, sigma = 2,
                     cp = cbind(c(2,3),c(60,70))){
  
  # Generates data from an interval-censored regression
  #  model with fixed effects and logit errors and
  #  homoskedasticity.
  #
  # n is the number of observations in the generated data
  #  set.
  # 
  # beta1 is the regression coefficient on the regressor
  #
  # sigma is the standard deviation of the error term
  #
  # cp is a matrix with cut points in the second column,
  #  and the corresponding levels of y in the first. An
  #  observation is >= the level in column 1 if its latent
  #  value is >= the level in column 2. Being below cp[1,2]
  #  means the dependent variable is cp[1,1]-1.
  #
  
  # Error terms
  ui1 = rlogis(n)
  ui2 = rlogis(n)
  
  # Covariates
  # Xi11 = rbinom(n,1,0.5) # regressor 1, period 1
  # Xi12 = rbinom(n,1,0.5) # regressor 1, period 2
  Xi11 = rnorm(n)
  Xi12 = rnorm(n)
  
  # Unobserved heterogeneity is related to first regressor
  alphai = 65 + rlogis(n) + (Xi11+Xi12)
  
  # Dependent variable: latent variable and observed
  yi1star = alphai + Xi11*beta1 - sigma*ui1
  yi2star = alphai + Xi12*beta1 - sigma*ui2
  
  # Apply censoring
  yi1 = h(yi1star,cp = cp)
  yi2 = h(yi2star,cp = cp)
  
  # Gather in a data frame
  return(data.frame(yi1,yi2,Xi11,Xi12,alphai,yi1star,yi2star))
  
}

icfe_dgp_exp <- function(n = 100,
                         beta1 = 1,
                         gamma0 = log(2), gamma1 = 0,
                         cp = cbind(c(2,3),c(60,70))){
  
  # Generate data from an interval-censored regression
  #  model with fixed effects, and exponential
  #  heteroskedasticity.
  
  # Error terms
  ui1 = rlogis(n)
  ui2 = rlogis(n)
  
  # Covariates
  Xi11 = rbinom(n,1,0.5) # regressor 1, period 1
  Xi12 = rbinom(n,1,0.5) # regressor 1, period 2
  Xi11 = rnorm(n)
  Xi12 = rnorm(n)
  
  sigma = exp(gamma0 + gamma1*(Xi11 + Xi12))
  
  # Unobserved heterogeneity is related to first regressor
  alphai = 65 + 1*rlogis(n) + 2*(Xi11+Xi12)*0.5
  
  # Dependent variable: latent variable and observed
  yi1star = alphai + Xi11*beta1 - sigma*ui1
  yi2star = alphai + Xi12*beta1 - sigma*ui2
  
  # Apply censoring
  yi1 = h(yi1star,cp = cp)
  yi2 = h(yi2star,cp = cp)
  
  # Gather in a data frame
  return(data.frame(yi1,yi2,Xi11,Xi12,alphai,yi1star,yi2star))
  
}



# Some helpfiles ----------------------------------------------------------



dichotomize <- function(x,df){
  # Takes the original data frame `df` and 
  #  `x` one line from the expanded
  #   cutpoint matrix (y1,y2,c(y2)-c(y1)) to generate 
  #   the converted data set.
  cut1 <- x[1] # cutoff value for first period
  cut2 <- x[2] # cutoff value for second period
  delta_c <- x[3] # difference between tupper cut points for latent 
  df_dich <- df %>% mutate(d1 = ifelse(yi1>=cut1,1,0),
                           d2 = ifelse(yi2>=cut2,1,0),
                           dbar = d1+d2,
                           cuts = paste(cut1,"-",cut2),
                           dc = delta_c,
                           dX = Xi12-Xi11) %>%
    filter(dbar==1)
}

expand_df <- function(df,cp){

  # Prep to feed to dichotomize
  cp_mat <- cbind(expand.grid(cp[,1],cp[,1]),expand.grid(cp[,2],cp[,2]))
  cp_mat <- cbind(cp_mat[,1:2],cp_mat[,3]-cp_mat[,4])
  names(cp_mat) <- c("cut1","cut2","dc")
  cp_mat <- as.matrix(cp_mat)
  
  # The following two lines generate a list of data frames 
  #   with one item for each combination. Then use ``Reduce``
  #   to combine them into one.
  df_list <- apply(cp_mat,1,dichotomize,df=df)
  final_df <- Reduce(rbind,df_list)
  
  return(final_df)
}

# Expand estimation --------------------------------------------------------------

icfe <- function(df,cp) {
  
  # df is a data frame that has variables
  # - yi1, the dependent variable in period 1
  # - yi2, the dependent variable in period 2
  # - Xi11, the regressor in period 1
  # - Xi12, the regressor in period 2
  #
  
  # Estimation proceeds in three steps.
  # 
  # 1. Generates a new data set that is approximately
  #      (J-1)^2 as big as the old one by copying
  #      the data set for each combination of points
  #      in cp. Generate D1 = 1(yi1>=cut1), 
  #      D2 = 1(yi2>=cut2), and dC = - (cp[cut2,2] - cp[cut2,1]).
  #      Also generate dX = Xi12-Xi11 and filter on D1+D2==1.
  final_df <- expand_df(df,cp) 
  #
  # 2. Run logit on augmented data, and extract 
  #      (b and sigma)-hat.
  
  # Run the logit
  results_logit <- glm(d2 ~ -1 + dX + dc,family = "binomial",data = final_df)
  
  # Finally, to make the results useful, we extract...
  sigma_hat <- 1/results_logit$coefficients["dc"]
  beta_hat <- sigma_hat * results_logit$coefficients["dX"]
  
  return(list(sigma_hat=sigma_hat,beta_hat=beta_hat))
  
}