rm(list = ls())
library(tidyverse)
library(here)
library(foreign)
library(readxl)
library(rlang)
source(here::here("codes","essay1_paths.R"))

# Read POF 98 --------

# List files
pof98_dbfs <- list.files(paste0(essay1_data_raw,"/fipe"),pattern = ".DBF",full.names = T)

# Apply to all files
pof98 <- lapply(pof98_dbfs,read.dbf,as.is=T)

# Bind rows
pof98 <- bind_rows(pof98)
pof98 <- pof98 %>% 
  mutate(PRODUTO=tolower(iconv(as_character(PRODUTO,encoding="latin1"), from = 'latin1', to = 'ASCII//TRANSLIT')),
         MARCA=tolower(iconv(as_character(MARCA,encoding="latin1"), from = 'latin1', to = 'ASCII//TRANSLIT')),
         TIPO=tolower(iconv(as_character(TIPO,encoding="latin1"), from = 'latin1', to = 'ASCII//TRANSLIT')))

# Read List of Original Households --------

# List of 2200 households included in the original weights computation
pof_original_dom  <- read_excel(paste0(essay1_data_raw,"/fipe/POF 98-99 cadastro 2200.xlsx"),sheet="Planilha1")

original_dom_codes <- pof_original_dom %>%
  pull(cod_dom)

pof98 <- pof98 %>%
  filter(COD_DOM %in% original_dom_codes)

# Correspondence Table --------

# Read Correspondence Table
ipcfipe_pof_correspondence <- readRDS(paste0(essay1_data_processed,"/fipe/ipcfipe_pof_correspondence.rds")) %>% 
  filter(level==6)

# Matchings -------

ipcfipe_pof_correspondence <- ipcfipe_pof_correspondence %>% 
  group_by(COD_PROD) %>% 
  mutate(n = n())   

# 1 to 1 matchings

one_to_one <- ipcfipe_pof_correspondence %>% 
  filter(n == 1) %>% 
  distinct(COD_PROD,codigo)  

pof_one_to_one <- pof98 %>% 
  semi_join(one_to_one, by = c("COD_PROD")) %>% 
  left_join(one_to_one, by = c("COD_PROD"))

# 1 to N matchings

one_to_n <- ipcfipe_pof_correspondence %>% 
  filter(n != 1) %>% 
  select(codigo,ipc_desc,COD_PROD,COD_TIPO,COD_MAR)

# Adjustments before matching to COD_TIPO and COD_MARCA to avoid duplicates
pof98 <- pof98 %>% 
  mutate(
    COD_MAR = ifelse((COD_PROD == 648 & COD_MAR!=1632),9999,COD_MAR),
    COD_TIPO = ifelse((COD_PROD == 630 & COD_TIPO!=1525),9999,COD_TIPO),
    COD_TIPO = ifelse((COD_PROD == 174 & COD_TIPO!=1408),9999,COD_TIPO),
    COD_TIPO = ifelse((COD_PROD == 175 & COD_TIPO!=1408),9999,COD_TIPO),
    COD_TIPO = ifelse((COD_PROD == 176 & COD_TIPO!=1408),9999,COD_TIPO),
    COD_TIPO = ifelse((COD_PROD == 649 & !(COD_TIPO %in% c(1994,2219,2205,2031,2503))),9999,COD_TIPO),
    )

# match by COD_PROD and COD_TIPO
one_to_n_tipo <- one_to_n %>% 
  filter(!is.na(COD_TIPO)) %>% 
  select(COD_PROD,COD_TIPO,codigo)

pof_one_to_n_tipo <- pof98 %>% 
  semi_join(one_to_n_tipo, by = c("COD_PROD","COD_TIPO")) %>% 
  left_join(one_to_n_tipo, by = c("COD_PROD","COD_TIPO"))

# match by COD_PROD and COD_MAR
one_to_n_marca <- one_to_n %>% 
  filter(!is.na(COD_MAR)) %>% 
  select(COD_PROD,COD_MAR,codigo)

pof_one_to_n_marca <- pof98 %>% 
  semi_join(one_to_n_marca, by = c("COD_PROD","COD_MAR")) %>% 
  left_join(one_to_n_marca, by = c("COD_PROD","COD_MAR"))

# Bind all parts together
pof98_matched_ipc <- bind_rows(pof_one_to_one,pof_one_to_n_marca,pof_one_to_n_tipo) 

# Insert ipc codes
ipc_codes <- ipcfipe_pof_correspondence %>% 
  ungroup() %>% 
  select_at(vars(codigo,starts_with("ipc"))) %>% 
  distinct()

pof98_matched_ipc <- pof98_matched_ipc %>% 
  left_join(ipc_codes, by = "codigo")

# Check non-matched prouducts (think later what to do with these!)
cod_prods <- pof98_matched_ipc %>% distinct(COD_PROD) %>% pull()

not_matched <- pof98 %>% filter(!(COD_PROD %in% cod_prods)) %>% distinct(COD_PROD,PRODUTO,TIPO,MARCA)

# save
saveRDS(pof98_matched_ipc,file = paste0(essay1_data_processed,"/fipe/pof_fipe.rds"))
