rm(list = ls())
library(tidyverse)
library(readxl)
library(here)
library(microdadosBrasil)
source(here::here("codes","essay1_paths.R"))

# Read POF to SNIPC ------

tab_pof_to_snipc <- read_excel(paste0(essay1_data_raw,"/ibge/Tabela_de_correspondencia_POF_SNIPC 2002-2003.xls"), skip = 2) %>% 
    mutate(cod_tab = GRUPO1*1000+ITEM1) %>% 
    rename(descricao = 2)


pof_to_snipc_expenditures <- function(file_type,ano,tab){
    x <- read_POF(ft = file_type,i = ano, root_path = paste0(essay1_data_raw,"/ibge/POF",ano))
    
    x <- x %>% 
        mutate(vad = val_def_anual*fator,
               renda_mon_anual = renda*12,
               uckey = (uf*1000000000)+(seq*1000000)+(dv*100000)+(domcl*1000)+(uc*100)+estrato)
               
    if(file_type == "caderneta_despesa"){
        x <- x %>% 
            mutate(cod = grupo*1000+floor(item/100),
                   codi = grupo*100000+item) %>% 
            left_join(tab_pof_to_snipc, by = c("cod"="cod_tab"))
    } else {
        x <- x %>% 
            mutate(cod = quadro*1000+floor(item/100),
                   codi = quadro*100000+item) %>% 
            left_join(tab_pof_to_snipc, by = c("cod"="cod_tab")) 
    }
    
    x <- x %>% 
        select(uf,uckey,estrato,fator,fator_set,cod,codi,
               obtencao,renda_mon_anual,subitem,vad,ipca_desc = descricao,
               pof_desc = NOME1)
}

file_types <- c("despesa_90dias",
                "despesa_12meses",
                "outras_despesas",
                "servico_domestico",
                "despesa_veiculo",
                "despesa_individual",
                "caderneta_despesa")

file_types <- setNames(file_types,file_types)

snipc_data_exp <- map(file_types, pof_to_snipc_expenditures,ano = 2002, tab = tab_pof_to_snipc)

# Parse INSS -----

ano <- 2002

snipc_data_exp$inss <- read_POF(ft = "servico_domestico",i = ano, root_path = paste0(essay1_data_raw,"/ibge/POF",ano)) %>% 
    mutate(vad = val_def_anual_inss*fator,
           renda_mon_anual = renda*12,
           uckey = (uf*1000000000)+(seq*1000000)+(dv*100000)+(domcl*1000)+(uc*100)+estrato,
           cod = quadro*1000+floor(item/100),
           codi = quadro*100000+item) %>% 
    left_join(tab_pof_to_snipc, by = c("cod"="cod_tab")) %>% 
    select(uf,uckey,estrato,fator,fator_set,cod,codi,
           obtencao,renda_mon_anual,subitem,vad,ipca_desc = descricao,
           pof_desc = NOME1)

# Parse Earnings -----

earnings <- read_POF(ft = "rendimentos",i = ano, root_path = paste0(essay1_data_raw,"/ibge/POF",ano))

earnings <- earnings %>% 
    mutate(renda_mon_anual = renda*12,
           uckey = (uf*1000000000)+(seq*1000000)+(dv*100000)+(domcl*1000)+(uc*100)+estrato,
           codi = NA)      

irpf <- earnings %>% 
    mutate(vad = ded_def_anual_IRPF*fator,
           cod = ifelse(vad>0,quadro*1000+6*100+pos_ocup,0))

previ <- earnings %>% 
    mutate(vad = ded_def_anual_previ*fator,
           cod = ifelse(vad>0,quadro*1000+5*100+pos_ocup,0))

outras <- earnings %>% 
    mutate(vad = ded_def_anual_outras*fator,
           cod = ifelse(vad>0,quadro*1000+7*100+pos_ocup,0))

# Parse other earnings -----
other_earnings <- read_POF(ft = "outros_rendimentos",i = ano, root_path = paste0(essay1_data_raw,"/ibge/POF",ano)) %>% 
    mutate(renda_mon_anual = renda*12,
           uckey = (uf*1000000000)+(seq*1000000)+(dv*100000)+(domcl*1000)+(uc*100)+estrato,
           codi = NA,
           vad = ded_def_anual*fator,
           cod = ifelse(vad>0,quadro*1000+floor(item/100),0))

snipc_data_earn <- bind_rows(irpf,previ,outras,other_earnings) %>% 
    left_join(tab_pof_to_snipc, by = c("cod"="cod_tab")) %>% 
    select(uf,uckey,estrato,fator,fator_set,cod,codi,
           renda_mon_anual,subitem,vad,ipca_desc = descricao,
           pof_desc = NOME1) %>% 
    filter(cod>0)

pof_ipca <- bind_rows(snipc_data_exp) %>% 
    bind_rows(snipc_data_earn)

# Create ipca code for subcategories

pof_ipca <- pof_ipca %>% 
    mutate(
        ipca = subitem,
        ipca_group = floor(ipca / 10 ^ 6),
        ipca_subgroup = floor(ipca / 10 ^ 5),
        ipca_item = floor(ipca / 10 ^ 3),
        renda_mensal_uc = renda_mon_anual/12
    ) 

# Eliminate products without a subitem correspondence 
pof_ipca <- pof_ipca %>%
    filter(!is.na(subitem))

# Save
saveRDS(pof_ipca,paste0(essay1_data_processed,"/ibge/pof2002_with_ipca_codes.rds"))
