rm(list = ls())
library(tidyverse)
library(here)
library(readxl)
library(lubridate)
library(magrittr)
source(here::here("essay1","codes","essay1_paths.R"))

rel_prices <- readRDS(paste0(essay1_data_processed,"/fipe/varieties_prices.rds")) 

# Join cheap_expensive indicators
cheap_expensive_indicators <- readRDS(paste0(essay1_data_processed,"/fipe/cheap_expensive_indicators.rds"))

rel_prices <- rel_prices %>% 
  left_join(cheap_expensive_indicators,by = c("codigo","VFMARCA","VFESTAB","VFLOCAL"))

# Compute High and Low Average Price
rel_prices_median <- rel_prices %>%
  select(codigo,date,rel_price,high_p_median) %>%
  group_by(codigo,date,high_p_median) %>%
  summarise(rel_price = mean(rel_price)) %>%
  spread(key=high_p_median,value=rel_price) %>% 
  rename(p_high_median = `1`, p_low_median = `0`)

rel_prices_quartile <- rel_prices %>%
  select(codigo,date,rel_price,high_p_quartile) %>%
  filter(high_p_quartile %in% c(0,1)) %>% 
  group_by(codigo,date,high_p_quartile) %>%
  summarise(rel_price = mean(rel_price)) %>%
  spread(key=high_p_quartile,value=rel_price) %>% 
  rename(p_high_quartile = `1`, p_low_quartile = `0`)

rel_prices_high_low <- rel_prices_median %>% 
  left_join(rel_prices_quartile, by = c("date","codigo")) %>% 
  ungroup()

# Join weights varieties 
ipcfipe_pof_correspondence <- readRDS(paste0(essay1_data_processed,"/fipe/ipcfipe_pof_correspondence.rds")) %>% 
  filter(!is.na(codigo)) %>% 
  distinct(codigo,ipc)

weights_7d <- readRDS(paste0(essay1_data_processed,"/fipe/weights_fipe_7d_decile.rds")) %>% 
  left_join(ipcfipe_pof_correspondence, by= "ipc") %>% 
  spread(key = decile, value = weight, sep = "_w_")

rel_prices_high_low <- rel_prices_high_low %>% 
  left_join(weights_7d, by = c("codigo"))

# Compute Weighted Within - variety part 

w_rel_prices <- rel_prices_high_low %>% 
  mutate_at(vars(starts_with("p_")),.funs = list(weighted = ~.*decile_w_11)) %>% 
  group_by(date) %>% 
  summarise_at(vars(ends_with("weighted")), ~sum(.,na.rm=T))


# Obtain categories with no varieties 
non_variety_rel_prices <- weights_7d %>% 
  anti_join(rel_prices_high_low, by= c("codigo"))

cpi <- readRDS(paste0(essay1_data_processed,"/fipe/ipc_fipe.rds")) %>% 
  filter(between(date,as_date("2001-05-01"),as_date("2004-04-01")))

non_variety_rel_prices <- non_variety_rel_prices %>% 
  left_join(cpi, by=c("codigo","ipc")) 

non_variety_rel_prices %>% 
  group_by(date) %>% 
  mutate(w_check = sum(decile_w_11)) %>% 
  select(w_check)

w_rel_prices_nonv <- non_variety_rel_prices %>% 
  mutate(p_low_median_weighted = cpi*decile_w_11,
         p_high_median_weighted = p_low_median_weighted,
         p_low_quartile_weighted = p_low_median_weighted,
         p_high_quartile_weighted = p_low_median_weighted) %>% 
  group_by(date) %>% 
  summarise_at(vars(ends_with("weighted")), ~sum(.,na.rm=T))

w_rel_prices_complete <- bind_rows(w_rel_prices, w_rel_prices_nonv) %>% 
  group_by(date) %>% 
  summarise_at(vars(ends_with("weighted")),~sum(.,na.rm = T))


# Define x-axis, colors and lines

x_axis <- c(
  seq(from=as_date("2001-05-01"), to=as_date("2002-04-01"), by="3 month"), #pre-dev
  as_date("2002-04-01"), #just-before-dev
  seq(from=as_date("2002-07-01"), to=as_date("2004-04-01"), by="3 month") #post-dev
  )

x_axis_2 <- data.frame(time = rep(-11:24,4))
  
data_plot <- w_rel_prices_complete %>% 
  gather(key = type, value = within_p_index,-date) %>% 
  bind_cols(x_axis_2)

plot_median <- data_plot %>% 
  filter(str_detect(type,"median")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = within_p_index, color = type))+
  geom_vline(xintercept = as_date("2002-04-01"))+
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  theme_classic()+
  theme(
    legend.position = "bottom"
  )

plot_quartile <- data_plot %>% 
  filter(str_detect(type,"quartile")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = within_p_index, color = type))+
  geom_vline(xintercept = as_date("2002-04-01"))+
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  theme_classic()+
  theme(
    legend.position = "bottom"
  )
