rm(list = ls())
library(tidyverse)
library(lubridate)
library(rlang)
source(here::here("codes","essay1_paths.R"))

# Prepare POF Data ---------

# Read POF
pof_ibge <- readRDS(paste0(essay1_data_processed,"/ibge/pof2002_with_ipca_codes.rds"))

# Keep only relevant ipca codes
cpi <- readRDS(paste0(essay1_data_processed,"/ibge/cpi_br.rds"))
cpi_codes <- cpi %>%
    filter(level == 7) %>%
    distinct(ipca) %>%
    mutate(ipca = as.numeric(ipca))

pof_ibge <- pof_ibge %>%
    semi_join(cpi_codes,by=c("ipca"))


# Obtain deciles

deciles <- pof_ibge %>% 
  distinct(uckey,renda_mensal_uc) %>% 
  mutate(decile = cut(renda_mensal_uc,
                      breaks = quantile (renda_mensal_uc,probs = seq(0,1,by=0.1)),
                      labels = seq_len(10),
                      include.lowest = T,
                      na.rm = T),
         decile = as.numeric(decile)) %>% 
  select(-renda_mensal_uc)

# Join deciles

pof_ibge <- pof_ibge %>% 
  left_join(deciles, by = "uckey")

# Join TvsNT classification
ipca_TvsNT <- readRDS(paste0(essay1_data_raw,"/ipca_classifications_rev1999_2006.rds")) %>% 
  select(ipca = official_class,cnca)

pof_ibge <- pof_ibge %>% 
  left_join(ipca_TvsNT, by = c("ipca"))

# Compute Weights at 1 digit ---------------

# Weights 1 digit

weights_ibge_1d_dec <- pof_ibge %>% 
  group_by(ipca_group,decile) %>%
  summarise(tot_exp_group = sum(vad, na.rm = T)) %>% 
  group_by(decile) %>% 
  mutate(tot_exp_decile = sum(tot_exp_group, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(weight = tot_exp_group/tot_exp_decile)

weights_ibge_1d_ag <- pof_ibge %>% 
  group_by(ipca_group) %>% 
  summarise(tot_exp_group = sum(vad, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(tot_exp = sum(tot_exp_group, na.rm = T),
         weight = tot_exp_group/tot_exp,
         decile = 11) # obs: decile 11 to indicate aggregate economy

weights_ibge_1d <- bind_rows(weights_ibge_1d_dec,weights_ibge_1d_ag) %>% 
  select_at(vars(-starts_with("tot")))

# Check if weights sum to 1
weights_ibge_1d %>% group_by(decile) %>% summarise(w = sum(weight)) %>% filter(!near(w,1)) # Zero obs

# Compute Weights  by TvsNT ---------------

# Weights 

weights_ibge_TvsNT_dec <- pof_ibge %>% 
  group_by(decile,cnca) %>%
  summarise(tot_exp_cnca = sum(vad, na.rm = T)) %>% 
  group_by(decile) %>% 
  mutate(tot_exp_decile = sum(tot_exp_cnca, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(weight = tot_exp_cnca/tot_exp_decile)

weights_ibge_TvsNT_ag <- pof_ibge %>% 
  group_by(cnca) %>%
  summarise(tot_exp_cnca = sum(vad, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(tot_exp = sum(tot_exp_cnca, na.rm = T),
         weight = tot_exp_cnca/tot_exp,
         decile = 11) # obs: decile 11 to indicate aggregate economy

weights_ibge_TvsNT <- bind_rows(weights_ibge_TvsNT_dec,weights_ibge_TvsNT_ag) %>% 
  select_at(vars(-starts_with("tot")))

# Check if weights sum to 1
weights_ibge_TvsNT %>% group_by(decile,cnca) %>% summarise(w = sum(weight)) %>% filter(!near(w,1)) # Zero obs


# Compute Weights at 7 digits -------------------

# Weights 7 digit

weights_ibge_7d_dec <- pof_ibge %>% 
  group_by(ipca,decile) %>%
  summarise(tot_exp_subitem = sum(vad, na.rm = T)) %>% 
  group_by(decile) %>% 
  mutate(tot_exp_decile = sum(tot_exp_subitem, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(weight = tot_exp_subitem/tot_exp_decile)

weights_ibge_7d_ag <- pof_ibge %>% 
  group_by(ipca) %>% 
  summarise(tot_exp_subitem = sum(vad, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(tot_exp = sum(tot_exp_subitem, na.rm = T),
         weight = tot_exp_subitem/tot_exp,
         decile = 11) # obs: decile 11 to indicate aggregate economy

weights_ibge_7d <- bind_rows(weights_ibge_7d_dec,weights_ibge_7d_ag) %>% 
  select_at(vars(-starts_with("tot")))

# Check if weights sum to 1
weights_ibge_7d %>% group_by(decile) %>% summarise(w = sum(weight)) %>% filter(!near(w,1)) # Zero obs

# Compute Weights at 7 digits by Household -------------------
weights_ibge_7d_hou <- pof_ibge %>% 
  group_by(ipca,uckey) %>%
  summarise(tot_exp_subitem = sum(vad, na.rm = T)) %>% 
  group_by(uckey) %>% 
  mutate(tot_exp_decile = sum(tot_exp_subitem, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(weight = tot_exp_subitem/tot_exp_decile) %>% 
  select_at(vars(-starts_with("tot")))

# Check if weights sum to 1
weights_ibge_7d_hou %>% group_by(uckey) %>% summarise(w = sum(weight)) %>% filter(!near(w,1)) # Zero obs

# Save ------ 
saveRDS(weights_ibge_1d, paste0(essay1_data_processed,"/ibge/weights2002_ibge_1d_decile.rds"))
saveRDS(weights_ibge_TvsNT, paste0(essay1_data_processed,"/ibge/weights2002_ibge_1d_decile_TvsNT.rds"))
saveRDS(weights_ibge_7d, paste0(essay1_data_processed,"/ibge/weights2002_ibge_7d_decile.rds"))
saveRDS(weights_ibge_7d_hou, paste0(essay1_data_processed,"/ibge/weights2002_ibge_7d_household.rds"))
