rm(list = ls())
library(tidyverse)
library(here)
library(lubridate)
source(here::here("codes","essay1_paths.R"))

# Read datasets ----

rel_prices <- readRDS(paste0(essay1_data_processed,"/fipe/varieties_prices.rds")) 

cpi <- readRDS(paste0(essay1_data_processed,"/fipe/ipc_fipe.rds")) %>% 
  filter(between(date,as_date("2001-05-01"),as_date("2004-04-01")))

# Join cheap_expensive indicators ---
cheap_expensive_indicators <- readRDS(paste0(essay1_data_processed,"/fipe/cheap_expensive_indicators.rds"))

rel_prices <- rel_prices %>% 
  left_join(cheap_expensive_indicators,by = c("codigo","VFMARCA","VFESTAB","VFLOCAL"))

# Compute High and Low Average Price ----
rel_prices_median <- rel_prices %>%
  select(codigo,date,rel_price,high_p_median) %>%
  group_by(codigo,date,high_p_median) %>%
  summarise(rel_price = mean(rel_price)) %>%
  spread(key=high_p_median,value=rel_price) %>% 
  rename(p_high_median = `1`, p_low_median = `0`)

rel_prices_quartile <- rel_prices %>%
  select(codigo,date,rel_price,high_p_quartile) %>%
  filter(high_p_quartile %in% c(0,1)) %>% 
  group_by(codigo,date,high_p_quartile) %>%
  summarise(rel_price = mean(rel_price)) %>%
  spread(key=high_p_quartile,value=rel_price) %>% 
  rename(p_high_quartile = `1`, p_low_quartile = `0`)

rel_prices_high_low <- rel_prices_median %>% 
  left_join(rel_prices_quartile, by = c("date","codigo")) %>% 
  ungroup()

# Join weights to varieties  -----
ipcfipe_pof_correspondence <- readRDS(paste0(essay1_data_processed,"/fipe/ipcfipe_pof_correspondence.rds")) %>% 
  filter(!is.na(codigo)) %>% 
  distinct(codigo,ipc)

weights_7d <- readRDS(paste0(essay1_data_processed,"/fipe/weights_fipe_7d_decile.rds")) %>% 
  left_join(ipcfipe_pof_correspondence, by= "ipc") %>% 
  spread(key = decile, value = weight, sep = "_w_")

rel_prices_high_low <- rel_prices_high_low %>% 
  left_join(weights_7d, by = c("codigo"))

# First term (common to conservative and liberal)  ---------

first_term_variety <- rel_prices_high_low %>% 
  mutate_at(vars(starts_with("p_low")),.funs = list(weighted_bottom = ~.*decile_w_1)) %>% 
  mutate_at(vars(starts_with("p_high")),.funs = list(weighted_top = ~.*decile_w_10)) %>% 
  group_by(date) %>% 
  summarise_at(vars(contains("weighted")), ~sum(.,na.rm=T))


# Second term ------------------

# Obtain categories with no varieties 
non_variety_rel_prices <- weights_7d %>% 
  anti_join(rel_prices_high_low, by= c("codigo"))

## conservative ---------

second_term_conservative <- non_variety_rel_prices %>% 
  left_join(cpi, by=c("codigo","ipc")) 

# Check total weight of this part
second_term_conservative %>% 
  group_by(date) %>% 
  mutate(w_check_1 = sum(decile_w_1,na.rm=T),
         w_check_10 = sum(decile_w_10,na.rm=T)) %>% 
  select(w_check_1,w_check_10)

second_term_conservative <- second_term_conservative %>% 
  mutate(p_low_median_weighted_bottom = cpi*decile_w_1,
         p_high_median_weighted_top = cpi*decile_w_10,
         p_low_quartile_weighted_bottom = cpi*decile_w_1,
         p_high_quartile_weighted_top = cpi*decile_w_10) %>%
  group_by(date) %>% 
  summarise_at(vars(contains("weighted")), ~sum(.,na.rm=T))

combined_conservative <- bind_rows(first_term_variety, second_term_conservative) %>% 
  group_by(date) %>% 
  summarise_at(vars(contains("weighted")),~sum(.,na.rm = T))

saveRDS(combined_conservative, paste0(essay1_results,"/combined_conservative.rds"))

# Plot conservative 


x_axis <- c(
  seq(from=as_date("2001-04-01"), to=as_date("2002-04-01"), by="3 month"), #pre-dev
  seq(from=as_date("2002-07-01"), to=as_date("2004-04-01"), by="3 month") #post-dev
)

data_plot_conservative <- combined_conservative %>% 
  gather(key = type, value = combined_p_index,-date)

plot_median_conservative <- data_plot_conservative %>% 
  filter(str_detect(type,"median")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = combined_p_index, linetype = type))+
  geom_vline(xintercept = as_date("2002-04-01"), linetype = "dashed")+
  scale_linetype_manual(values = c("dotted","solid"), labels = c("Low-income households", "High-income households")) +
  guides(linetype = guide_legend(title = "",ncol = 1 )) +
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  ylim(NA,1.35) +
  theme_classic()+
  theme(legend.position = c(0.8,.1),
        legend.text=element_text(size=7),
        legend.title=element_text(size = 7, hjust = 1),
        legend.background=element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 6,hjust = 1))

plot_quartile_conservative <- data_plot_conservative %>% 
  filter(str_detect(type,"quartile")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = combined_p_index, linetype = type))+
  geom_vline(xintercept = as_date("2002-04-01"), linetype = "dashed")+
  scale_linetype_manual(values = c("dotted","solid"), labels = c("Low-income households", "High-income households")) +
  guides(linetype = guide_legend(title = "",ncol = 1 )) +
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  ylim(NA,1.35) +
  theme_classic()+
  theme(legend.position = c(0.8,.1),
        legend.text=element_text(size=7),
        legend.title=element_text(size = 7, hjust = 1),
        legend.background=element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 6,hjust = 1))

saveRDS(plot_median_conservative, paste0(essay1_results,"/combined_conservative_median_plot.rds"))
saveRDS(plot_quartile_conservative, paste0(essay1_results,"/combined_conservative_quartile_plot.rds"))


## liberal ---------

# denominator second term
rel_prices_denominator <- cpi %>% 
  left_join(weights_7d, by = c("codigo")) %>% 
  semi_join(rel_prices_high_low, by= c("codigo")) %>% 
  select_at(vars(date,starts_with("decile"),cpi))

# Check weights

rel_prices_denominator <- rel_prices_denominator %>% 
  mutate_at(vars(starts_with("cpi")),.funs = list(weighted_bottom = ~.*decile_w_1,
                                                  weighted_top = ~.*decile_w_10)) %>% 
  group_by(date) %>% 
  summarise_at(vars(contains("weighted")), ~sum(.,na.rm=T)) %>% 
  rename(denominator_bottom = weighted_bottom,
         denominator_top = weighted_top)

# numerator second term = first term
rel_prices_numerator <- first_term_variety

# ratio second term
ratio_second_term <- rel_prices_numerator %>% 
  left_join(rel_prices_denominator, by = "date") %>% 
  mutate_at(vars(contains("weighted_bottom")),.funs = list(ratio = ~./denominator_bottom)) %>% 
  mutate_at(vars(contains("weighted_top")),.funs = list(ratio = ~./denominator_top)) %>% 
  select(-denominator_bottom,-denominator_top)
                                                

# second term liberal
second_term_liberal <- non_variety_rel_prices %>% 
  left_join(cpi, by=c("codigo","ipc")) 

second_term_liberal <- second_term_liberal %>% 
  left_join(ratio_second_term, by = "date")  %>%  
  mutate(p_low_median_weighted_bottom = cpi*decile_w_1*p_low_median_weighted_bottom_ratio,
         p_high_median_weighted_top = cpi*decile_w_10*p_high_median_weighted_top_ratio,
         p_low_quartile_weighted_bottom = cpi*decile_w_1*p_low_quartile_weighted_bottom_ratio,
         p_high_quartile_weighted_top = cpi*decile_w_10*p_high_quartile_weighted_top_ratio) %>% 
  group_by(date) %>% 
  summarise_at(vars(p_low_median_weighted_bottom:p_high_quartile_weighted_top), ~sum(.,na.rm=T))

# within liberal
combined_liberal <- bind_rows(first_term_variety, second_term_liberal) %>% 
  group_by(date) %>% 
  summarise_at(vars(contains("weighted")),~sum(.,na.rm = T))

saveRDS(combined_liberal, paste0(essay1_results,"/combined_liberal.rds"))

# Plot liberal


x_axis <- c(
  seq(from=as_date("2001-04-01"), to=as_date("2002-04-01"), by="3 month"), #pre-dev
  seq(from=as_date("2002-07-01"), to=as_date("2004-04-01"), by="3 month") #post-dev
)

data_plot_liberal <- combined_liberal %>% 
  gather(key = type, value = combined_p_index,-date)

plot_median_liberal <- data_plot_liberal %>% 
  filter(str_detect(type,"median")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = combined_p_index, linetype = type))+
  geom_vline(xintercept = as_date("2002-04-01"), linetype = "dashed")+
  scale_linetype_manual(values = c("dotted","solid"), labels = c("Low-income households", "High-income households")) +
  guides(linetype = guide_legend(title = "",ncol = 1 )) +
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  theme_classic()+
  theme(legend.position = c(0.8,.1),
        legend.text=element_text(size=7),
        legend.title=element_text(size = 7, hjust = 1),
        legend.background=element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 6,hjust = 1))

plot_quartile_liberal <- data_plot_liberal %>% 
  filter(str_detect(type,"quartile")) %>% 
  ggplot() + 
  geom_line(aes(x = date, y = combined_p_index, linetype = type))+
  geom_vline(xintercept = as_date("2002-04-01"), linetype = "dashed")+
  scale_linetype_manual(values = c("dotted","solid"), labels = c("Low-income households", "High-income households")) +
  guides(linetype = guide_legend(title = "",ncol = 1 )) +
  scale_x_date(breaks=x_axis,date_labels="%b %Y") +
  theme_classic()+
  theme(legend.position = c(0.8,.1),
        legend.text=element_text(size=7),
        legend.title=element_text(size = 7, hjust = 1),
        legend.background=element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        panel.grid = element_blank(),
        axis.text.x = element_text(size = 6,hjust = 1))

saveRDS(plot_median_liberal, paste0(essay1_results,"/combined_liberal_median_plot.rds"))
saveRDS(plot_quartile_liberal, paste0(essay1_results,"/combined_liberal_quartile_plot.rds"))