rm(list = ls())
library(tidyverse)
library(lubridate)
library(here)
library(readxl)
source(here::here("codes","essay1_paths.R"))

regions <- c("br","pa","ce","pe","ba","mg","rj","sp","pr","rs")
for(j in regions){

# Read Sidra table 655
ipca_tab655 <- read_xlsx(paste0(essay1_data_raw,"/ibge/tab655_",j,".xlsx"),skip = 3,na="-") %>% 
  select(-1) %>% 
  rename(ipca = 1) %>% 
  filter(!is.na(ipca)) %>% 
  mutate(ipca = ifelse(ipca=="Índice geral","0.Índice geral",ipca)) %>% 
  separate(ipca,c("ipca","ipca_desc"), extra = "merge")

# correct names
date_names <- as.character(seq.Date(from=as.Date("1999-08-01"),to=as.Date("2006-06-01"),by="month"))
names(ipca_tab655)[3:85] <- date_names

ipca_tab655 <- ipca_tab655 %>% 
  gather(key = date, value = ipca_inflation,-ipca,-ipca_desc) %>% 
  mutate(date = as.Date(date))

# Create price index
ipca_tab655 <- ipca_tab655 %>% 
  group_by(ipca) %>% 
  arrange(ipca,date) %>% 
  mutate(cpi = cumprod(ifelse(date=="1999-08-01",1,1+ipca_inflation/100)),
         level = nchar(ipca))

# Rebase
base_date <- "2002-04-01"
cpi_base <- ipca_tab655 %>% 
  filter(date == base_date) %>% 
  select(ipca,cpi_base = cpi)

ipca_tab655 <- ipca_tab655 %>% 
  left_join(cpi_base, by = "ipca") %>% 
  mutate(cpi_new_base = cpi/cpi_base) %>% 
  ungroup() %>% 
  select(level,ipca,ipca_desc,date,ipca_inflation, cpi = cpi_new_base,cpi_base)

# Save
saveRDS(ipca_tab655,paste0(essay1_data_processed,"/ibge/cpi_",j,".rds"))
}