rm(list = ls())
library(tidyverse)
library(lubridate)
library(rlang)
source(here::here("codes","essay1_paths.R"))

# Prepare POF Data ---------

# Read POF
pof_ibge <- readRDS(paste0(essay1_data_processed,"/ibge/pof2002_with_ipca_codes.rds"))


#regions <- c("br","pa","ce","pe","ba","mg","rj","sp","pr","rs")
regions_numeric <- c(15,23,26,29,31,33,35,41,43)

for (j in regions_numeric){
  if (j == 15){
    i <- "pa"
  }
  
  if (j == 23){
    i <- "ce"
  }
  
  if (j == 26){
    i <- "pe"
  }
  
  if (j == 29){
    i <- "ba"
  }
  
  if (j == 31){
    i <- "mg"
  }
  
  if (j == 33){
    i <- "rj"
  }
  
  if (j == 41){
    i <- "pr"
  }
  
  if (j == 43){
    i <- "rs"
  }
  
  if (j == 35){
    i <- "sp"
  }
  
  # Keep only relevant ipca codes
  cpi <- readRDS(paste0(essay1_data_processed,"/ibge/cpi_",i,".rds"))
 
   cpi_codes <- cpi %>%
    filter(level == 7,!is.na(cpi)) %>%
    distinct(ipca) %>%
    mutate(ipca = as.numeric(ipca))
  
  x <- pof_ibge %>%
    semi_join(cpi_codes,by=c("ipca"))
  
  
  
  # Keep only relevant region
  
  if (j == 15){
    x <- x %>% 
      filter(uf == j, between(estrato,1,9))

    # Obtain deciles

    deciles <- x %>% 
      distinct(uckey,renda_mensal_uc) %>% 
      mutate(decile = cut(renda_mensal_uc,
                          breaks = quantile (renda_mensal_uc,probs = seq(0,1,by=0.1)),
                          labels = seq_len(10),
                          include.lowest = T,
                          na.rm = T),
             decile = as.numeric(decile)) %>% 
      select(-renda_mensal_uc)
    
    # Join deciles
    
    x <- x %>% 
      left_join(deciles, by = "uckey")
  }
  
  if (j %in% c(23,26,29,31,41,43)){
    x <- x %>% 
      filter(uf == j, between(estrato,1,10))
    
    # Obtain deciles
    
    deciles <- x %>% 
      distinct(uckey,renda_mensal_uc) %>% 
      mutate(decile = cut(renda_mensal_uc,
                          breaks = quantile (renda_mensal_uc,probs = seq(0,1,by=0.1)),
                          labels = seq_len(10),
                          include.lowest = T,
                          na.rm = T),
             decile = as.numeric(decile)) %>% 
      select(-renda_mensal_uc)
    
    # Join deciles
    
    x <- x %>% 
      left_join(deciles, by = "uckey")
  }
  
  if (j %in% c(33,35)){
    x <- x %>% 
      filter(uf == j, between(estrato,1,20))
    
    # Obtain deciles
    
    deciles <- x %>% 
      distinct(uckey,renda_mensal_uc) %>% 
      mutate(decile = cut(renda_mensal_uc,
                          breaks = quantile (renda_mensal_uc,probs = seq(0,1,by=0.1)),
                          labels = seq_len(10),
                          include.lowest = T,
                          na.rm = T),
             decile = as.numeric(decile)) %>% 
      select(-renda_mensal_uc)
    
    # Join deciles
    
    x <- x %>% 
      left_join(deciles, by = "uckey")
}
  
  # Compute Weights at 7 digits -------------------
  
  # Weights 7 digit
  
  weights_ibge_7d_dec <- x %>% 
    group_by(ipca,decile) %>%
    summarise(tot_exp_subitem = sum(vad, na.rm = T)) %>% 
    group_by(decile) %>% 
    mutate(tot_exp_decile = sum(tot_exp_subitem, na.rm = T)) %>% 
    ungroup() %>% 
    mutate(weight = tot_exp_subitem/tot_exp_decile)
  
  weights_ibge_7d_ag <- x %>% 
    group_by(ipca) %>% 
    summarise(tot_exp_subitem = sum(vad, na.rm = T)) %>% 
    ungroup() %>% 
    mutate(tot_exp = sum(tot_exp_subitem, na.rm = T),
           weight = tot_exp_subitem/tot_exp,
           decile = 11) # obs: decile 11 to indicate aggregate economy
  
  weights_ibge_7d <- bind_rows(weights_ibge_7d_dec,weights_ibge_7d_ag) %>% 
    select_at(vars(-starts_with("tot"))) %>% 
    mutate(region = i)
  
  # Assign
  assign(paste0("regional_weights_",i),weights_ibge_7d)
  
  rm(weights_ibge_7d)
}

weights_ibge_7d <- mget(apropos("regional_weights")) %>% 
  bind_rows()

# Check if weights sum to 1
weights_ibge_7d %>% group_by(decile,region) %>% summarise(w = sum(weight)) %>% filter(!near(w,1)) # Zero obs

# Save ----
saveRDS(weights_ibge_7d, paste0(essay1_data_processed,"/ibge/regional_weights2002_ibge_7d_decile.rds"))
