/*
AUTHOR:   Todd Clark, Federal Reserve Bank of Cleveland, todd.clark@researchfed.org
Code associated with Andrea Carriero, Todd Clark and Massimiliano Marcellino, 
"Nowcasting Tail Risks to Economic Activity at a Weekly Frequency"

Program for estimating the nowcasting model indicated below.  The program writes to an Excel file
posterior mean forecasts for selected quantiles.  The program also writes all draws of forecasts
for 19 quantiles from 0.05 to 0.95.  Note that the paper's results for BQR and PQR are obtained
using the program processdraws_BQRPQR.prg to read in draws for 19 quantiles, fit an empirical density,
and compute the tail risk forecasts/measures of interest from the empirical density.
*/

********************* OOS estimation and forecasting: Bayesian QR
********************* base macro indicators + larger set of other spending indicators 

dis %dateandtime()
dis %ratsversion()
comp overallstart = %cputime()

/*
note:  To change variables included:
1. change the numerical settings for nmlyvar, nwklyvar at the top
2. change the series listed in the EQV command.  Last nwklyvar entries are those for weekly variables.  These are duplicated (same order)
  as monthly variables, appended to the list of monthly variables preceding the list of weekly variables
3. change the vectors of timing indicators, mlytiming and wklytiming
4. Add/change data reads as need be (references to sheets, e.g.) to read in relevant add-ons
*/

*********************: BASIC SETUP & PARAMETER ENTRY
*********************

** key dimensions used below
comp nmlyvar = 5                                ;* number of purely monthly predictor variables to be considered
comp nboth = 2                                  ;* number of predictor variables that are both monthly and weekly to be considered
comp nwklyvar = 5                               ;* weekly-only predictors
comp totxvar = nmlyvar+2*nboth+nwklyvar         ;* 2*nwkly b/c, for wkly variables, we use full month readings preceding the weekly readings on the month

env nowshowgraphs
gsave(format=pdf) "basemacro_largewkly_BQR_*.pdf"
comp modeldesc = "BQR model:  base macro + large weekly"
comp filelabel = "fcdraws/BQR_M_largewkly/"

comp ndraws =  5000	     ;*total number of Gibbs draws
comp burndraws = 1000	 ;*burn-in draws for Gibbs sampler (discarded)

/*
comp ndraws =  100	     ;*total number of Gibbs draws
comp burndraws = 25	 ;*burn-in draws for Gibbs sampler (discarded)
*/

************ note on dating convention: forecast dates refer to forecast origin, rather than forecast endpoint
comp useactual = 1          ;* adjustment for obs lost of last vintage relative to eval sample end date (used to set this at 2)
comp styr = 1947
cal(q) styr:1
comp stsmpl = styr:1	;*earliest period with data

comp stvint = 2007:1       ;* stvint is first vintage, which corresponds to the start of forecasting
comp endvint = 2021:1      ;* last quarter of data vintages considered
comp endsmpl = endvint - useactual
comp lastmo = 2             ;* last month's vintage 
comp hardstpt = 1996:3      ;* common start point for estimation based on qly-mly data for full sample
comp nv = (%year(endvint)-%year(stvint))*12+lastmo  ;* this will be the number of vintages available

***** SETTING RANGE OVER WHICH THIS PROGRAM PRODUCES FORECAST RESULTS 
comp startrange = stvint   ;* earliest is stvint 2008:1
comp endrange = 2014:4     ;* latest is endvint-1
dis %datelabel(startrange) %datelabel(endrange)

all totxvar+nv endvint
smpl stsmpl endvint

comp seedval = 128*startrange
seed seedval
dis seedval

*********************: DATA READ AND SETUP that can be done up front (before loop)
*********************
** declare variables to be used:  ordering of monthly only, mly both, wkly both, wkly only
eqv 1 to totxvar+nv
employ ism ipt rsafs hs claimsmly cclaimsmly claimswkly cclaimswkly steelwkly utilwkly loadswkly fuelwkly rbookwkly $
gdp07m1 gdp07m2 gdp07m3 gdp07m4 gdp07m5 gdp07m6 gdp07m7 gdp07m8 gdp07m9 gdp07m10 gdp07m11 gdp07m12 $
gdp08m1 gdp08m2 gdp08m3 gdp08m4 gdp08m5 gdp08m6 gdp08m7 gdp08m8 gdp08m9 gdp08m10 gdp08m11 gdp08m12 gdp09m1 gdp09m2 gdp09m3 gdp09m4 $
gdp09m5 gdp09m6 gdp09m7 gdp09m8 gdp09m9 gdp09m10 gdp09m11 gdp09m12 gdp10m1 gdp10m2 gdp10m3 gdp10m4 gdp10m5 gdp10m6 gdp10m7 gdp10m8 $
gdp10m9 gdp10m10 gdp10m11 gdp10m12 gdp11m1 gdp11m2 gdp11m3 gdp11m4 gdp11m5 gdp11m6 gdp11m7 gdp11m8 gdp11m9 gdp11m10 gdp11m11 gdp11m12 $
gdp12m1 gdp12m2 gdp12m3 gdp12m4 gdp12m5 gdp12m6 gdp12m7 gdp12m8 gdp12m9 gdp12m10 gdp12m11 gdp12m12 gdp13m1 gdp13m2 gdp13m3 gdp13m4 $
gdp13m5 gdp13m6 gdp13m7 gdp13m8 gdp13m9 gdp13m10 gdp13m11 gdp13m12 gdp14m1 gdp14m2 gdp14m3 gdp14m4 gdp14m5 gdp14m6 gdp14m7 gdp14m8 $
gdp14m9 gdp14m10 gdp14m11 gdp14m12 gdp15m1 gdp15m2 gdp15m3 gdp15m4 gdp15m5 gdp15m6 gdp15m7 gdp15m8 gdp15m9 gdp15m10 gdp15m11 $
gdp15m12 gdp16m1 gdp16m2 gdp16m3 gdp16m4 gdp16m5 gdp16m6 gdp16m7 gdp16m8 gdp16m9 gdp16m10 gdp16m11 gdp16m12 gdp17m1 gdp17m2 $
gdp17m3 gdp17m4 gdp17m5 gdp17m6 gdp17m7 gdp17m8 gdp17m9 gdp17m10 gdp17m11 gdp17m12 gdp18m1 gdp18m2 gdp18m3 gdp18m4 gdp18m5 gdp18m6 $
gdp18m7 gdp18m8 gdp18m9 gdp18m10 gdp18m11 gdp18m12 gdp19m1 gdp19m2 gdp19m3 gdp19m4 gdp19m5 gdp19m6 gdp19m7 gdp19m8 gdp19m9 gdp19m10 $
gdp19m11 gdp19m12 gdp20m1 gdp20m2 gdp20m3  gdp20m4 gdp20m5 gdp20m6 gdp20m7 gdp20m8 gdp20m9 gdp20m10 gdp20m11 gdp20m12 gdp21m1 gdp21m2

** define timing indicators used below
** mlytiming vector applies to monthly-only series; "both" category series at the (prior) monthly frequency are treated as available in week 1 
comp [vec[int]] mlytiming = ||1,1,3,2,3||  ;* indicates what week of the month each true monthly freq variable becomes available
** wklytiming vector applies to series of "both" category at weekly frequency and purely weekly series
comp [vec[int]] wklytiming = ||2,3,2,2,2,2,2|| ;* indicates what week of the month each true weekly freq variable becomes available for week 1
comp nomitavg = 5  ;* count of last in list of weekly series to omit from averaging

comp nmo = 3                     ;* number of months in quarter -- actual data
comp npredmo = 4                 ;* number of months at which we form forecasts: months 1-3 of quarter t plus month 1 of quarter t+1
comp nweek = 4                   ;* number of weeks used in month
comp totweek = npredmo*nweek-1     ;* total number of predictions.  -1 because we don't form a forecast for a 16th week, by which time gdp(t+1) becomes available

** stuff for right-hand side variables:  rec of series for storage and assigning labels, to make regression displays more transparent
**  holding monthly indicators sampled at month 1, month 2, etc. (qly frequency), and the same for weekly indicators
comp totmlyvar=nmlyvar+nboth, totwklyser=nwklyvar+nboth
dec rec[ser] mlyser(totmlyvar,nmo) wklyser(totwklyser,totweek)

** note:  this assignment of series to arrays of series used in setting up models
**  is automated; it works off of the dimensions indicated above and the variables listed in the EQV command
do i = 1,totmlyvar
 do mm = 1,nmo
  labels mlyser(i,mm)
  # %l(i)+'_m'+%string(mm)
 end do mm
end do i
do i = 1,totwklyser
 do mm = 1,totweek
  labels wklyser(i,mm)
  # %l(totmlyvar+i)+'_w'+%string(mm)
 end do mm
end do i

** just reading in GDP and forming growth rate series:  real time data for estimation and forecasting
open data ../data/realtimeGDP.allmonths.xlsx
data(format=xlsx,org=col,sheet="forweekly") / totxvar+1 to totxvar+nv
close
do i = 1,nv
  set(scratch) totxvar+i = 400.*log((totxvar+i){0}/(totxvar+i){1});* GDP growth
end do i

** read in GDP estimates used as actuals in evaluation of forecasts:  these are 2nd avail in the quarterly RTDSM
open data ../data/GDPactuals.secondrelease.xls
data(format=xls,org=col) / actualGDP
close

*********************: setting SOME (not all) stuff for prior and model setup
*********************
comp nylag = 1     ;* numbers of lags of GDP included
comp arlag = 4     ;* lags of each variable included in the autoregressions used to compute variances that enter the prior

comp priordfSigma = 5
** shrinkage hyperparameters for coefficient vector:  element 1: overall shrinkage, 2: cross-variable shrinkage, 3:  rate of lag decay/shrinkage
**      (where lower number means more shrinkage)
comp [vec] shrinkage = ||0.2, 0.2, 1.0||

dec vec priormean
dec symm priorvar
dec vec meancoef

********************************
******************************** stuff for estimation and forecast storage
********************************
sou(noecho) ../procedures/BayesQR.src

comp nquant = 19
dec vec pctiles(nquant)
ewise pctiles(i) = float(i)/(nquant+1)

dec vec[ser] fcseries(totweek) pct05series(totweek) pct10series(totweek) pct90series(totweek) pct95series(totweek) qs05series(totweek) qs10series(totweek)
do mm = 1,totweek                   ;* create series labels to facilitate later reading results from Excel file created below
 labels fcseries(mm)
 # 'fcseries_m'+%string(mm)
 labels pct05series(mm)
 # 'pct05_m'+%string(mm)
 labels pct10series(mm)
 # 'pct10_m'+%string(mm)
 labels pct90series(mm)
 # 'pct90_m'+%string(mm)
 labels pct95series(mm)
 # 'pct95_m'+%string(mm)
 labels qs05series(mm)
 # 'qs05_m'+%string(mm)
 labels qs10series(mm)
 # 'qs10_m'+%string(mm)
end do mm
clear fcseries pct05series pct10series pct90series pct95series qs05series qs10series

*** for storage of forecast draws across quantiles
comp skipint = 5
comp ndraws2=ndraws/skipint
dis ndraws2 (ndraws2*nquant)
dec vec[ser] fcdraws(ndraws2*nquant) alldraws(nquant)
clear(length=endvint) fcdraws
clear(length=ndraws) alldraws

********************************
******************************** read in and set up weekly data, which are not real time
********************************
smpl stsmpl endvint
comp wkeststpt = stsmpl
comp wkestendpt = endvint

** with setting up weekly data, we have to separately treat weeks 1-12 of quarter t, and weeks 13-15, which come from quarter t+1
open data ../data/weeklydata.notrealtime.xlsx
do mm = 1,totweek
 clear(length=endsmpl) totmlyvar+1 to totxvar
 if mm<=nmo*nweek
  comp mmm=mm, per=0
 else
   comp mmm=mm-nmo*nweek, per=-1
 *** reading data
 data(format=xlsx,org=col,sheet="intrates",sel=mmm) / tswkly cswkly sp500wkly
 data(format=xlsx,org=col,sheet="other",sel=mmm) / steelwkly utilwkly loadswkly fuelwkly rbookwkly
 data(format=xlsx,org=col,sheet="nfci",sel=mmm) / nfciwkly 
 data(format=xlsx,org=col,sheet="claims",sel=mmm) / claimswkly cclaimswkly 
 *** storing data 
 dofor i = totmlyvar+1 to totxvar
  set(scratch) wklyser(i-totmlyvar,mm) = i{per}
  inquire(series=i) tempstpt tempendpt
  comp wkeststpt = %imax(wkeststpt,tempstpt)      
  comp wkestendpt = %imin(wkestendpt,tempendpt)    
 end do i
end do mm
close data

smpl wkeststpt wkestendpt

** now, within the month, for weeks k = 2 through 4, make week k the average of months 1 through k for that month
** this makes SP500 an average of 4-week growth rates:  avg w1-w2 for w2, avg w1-w3 for w3, and avg w1-w4 for w4
do mm = 1,npredmo
 do nn = 2,nweek
  comp tt = (mm-1)*nweek+nn
  if tt>totweek
   break
  dofor i = totmlyvar+1 to totxvar-nomitavg
   set(scratch) wklyser(i-totmlyvar,tt) = wklyser(i-totmlyvar,tt){0} + wklyser(i-totmlyvar,tt-1){0}
  end do i
 end do nn
end do mm
do mm = 1,npredmo
 do nn = 2,nweek
  comp tt = (mm-1)*nweek+nn
  if tt>totweek
   break
  dofor i = totmlyvar+1 to totxvar-nomitavg
   set(scratch) wklyser(i-totmlyvar,tt) = (1./nn)*wklyser(i-totmlyvar,tt){0}  ;* now just divide to make the proper average
  end do i
 end do nn
end do mm

dis %datelabel(wkeststpt) %datelabel(wkestendpt)

********************************
******************************** now loop over time to read monthly predictors, determine data avail, set up model, and forecast
********************************

do time = startrange,endrange
 dis ''
 dis '*******************************************************'
 dis '******************************************************* forecast date = ' %datelabel(time)
 dis '*******************************************************'
 dis ''
 comp quarter = %month(time)
 comp originst = %cputime()

 do ww = 1,totweek
  smpl stsmpl time
  comp eststpt = stsmpl
  comp estendpt = time
  ****
  comp lastmoposs = ((ww-1)/4)
  comp nn = ww - lastmoposs*4
  comp yy = %year(time)
  comp month = lastmoposs+1+(quarter-1)*3
  comp mml = 1+(ww-1)/4     ;* month number in quarter (1-3 in quarter and 4 in next quarter)
  comp nn = ww - (mml-1)*4  ;* week number in month (1-4)
  if ww<=3
   comp horz = 2
  else
   comp horz = 1

  comp filename = "../data/rtwklydata.RATS/rtwklydata."+%string(yy)+"qtr"+quarter+"wk"+ww+".rat"

  dis ""
  dis "****************************** totweek, month, week = " ww mml nn
  dis 'filename = ' filename

  open data &filename
  do mm = 1,nmo
   clear 1 to totmlyvar
   *** reading data
   data(format=rats,sel=mm) / 1 to totmlyvar
   dofor i = 1 to totmlyvar
    set(scratch) mlyser(i,mm) = i{0}
    inquire(series=i) tempstpt tempendpt
    comp eststpt = %imax(eststpt,tempstpt)      
    comp estendpt = %imin(estendpt,tempendpt)    
   end do i
  end do mm
  close data
  *dis %datelabel(eststpt) %datelabel(estendpt)
  
  *print eststpt endpt mlyser

  ***** GDP data
  comp gdpmo = ((ww)/4)+1+(quarter-1)*3
  comp gdpyr = yy
  if quarter==4.and.gdpmo>12
   comp gdpmo=gdpmo-12, gdpyr=gdpyr+1
  
  dis 'GDP label = ' ('gdp'+%right(%string(gdpyr),2)+"m"+%string(gdpmo))
  set(scratch) gdp = %s('gdp'+%right(%string(gdpyr),2)+"m"+%string(gdpmo)){0}

  ******************************** now figure out start and end date for estimation based on avail of mly, wkly, and GDP
  comp eststpt = %imax(wkeststpt,eststpt)      
  comp estendpt = %imin(estendpt,estendpt)    
  inquire(series=gdp) tempstpt tempendpt
  comp eststpt = %imax(eststpt,tempstpt)      
  comp estendpt = %imin(estendpt,tempendpt)  
  comp eststpt = %imax(eststpt,hardstpt)      
  dis "estimation sample: " %datelabel(eststpt) %datelabel(estendpt)  
  
  ** check that it ends when it should
  if ww<=3.and.estendpt.ne.(time-2)
   dis "weeks 1-3 case: short at end of sample"
  else if ww>=4.and.estendpt.ne.(time-1)
   dis "weeks 4-15 case: short at end of sample"
   
  if eststpt<>hardstpt
   dis "different start date for estimation"

  ******************************** now build up model
    
  *** build up model:  lags of GDP growth
  comp [vec[int]] reglist = ||constant||
  if nylag>0
   {
    if mml==1.and.nn<=3
     enter(varying) reglist
     # reglist gdp{2 to 2+nylag-1}
    else
     enter(varying) reglist
     # reglist gdp{1 to 1+nylag-1}
   }
  
  *** build up model:  lags of monthly (only) variables
  do i = 1,nmlyvar
   if mml==1
    {
     if mlytiming(i)==1
      enter(varying) reglist
      # reglist mlyser(i,1){1} mlyser(i,2){1} mlyser(i,3){1}
     else 
      {
       if nn<mlytiming(i)
        enter(varying) reglist
        # reglist mlyser(i,1){1} mlyser(i,2){1}
       else
        enter(varying) reglist
        # reglist mlyser(i,1){1} mlyser(i,2){1} mlyser(i,3){1}
      }
    }  ;* end if mml==1
   else
    {
     do j = 1,mml-1
      if j==(mml-1).and.nn<mlytiming(i)
       next
      enter(varying) reglist
      # reglist mlyser(i,j){0}
     end do j
    } ;* end of case of mml>1
  end do i  
  
  *** build up model:  monthly averages of weekly variables as available
  do i = 1,nboth
   if mml==1.and.nn<3
    enter(varying) reglist
    # reglist mlyser(nmlyvar+i,1){1} mlyser(nmlyvar+i,2){1} mlyser(nmlyvar+i,3){1}
   else
    {
     do j = 1,mml-1
       enter(varying) reglist
       # reglist mlyser(nmlyvar+i,j){0}
     end do j
    } ;* end of dealing with month averages of weekly variables
  end do i  
  
  *** build up model:  weekly variables as available
  do i = 1,totwklyser
   if mml==1.and.nn<wklytiming(i)
     next
   if i>nboth.or.nn>=wklytiming(i)
     enter(varying) reglist
     # reglist wklyser(i,(mml-1)*nweek+nn-wklytiming(i)+1){0}
  end do i  

  smpl eststpt estendpt
  *** set up GDP equation for the purpose of configuring some dimensions
  equation baseline gdp
  # reglist
  comp ncoef = %eqnsize(baseline)
  
  *** now build the prior
  linreg(noprint) gdp
  # constant gdp{1 to arlag}
  comp gdpvar = %seesq
  comp muSigma = 0.8*%seesq^0.5

  comp priormean = %zeros(ncoef,1)
  comp priorvar = 0.01*%identity(ncoef)   ;* initialize prior variance to 0.01 times identity matrix
  comp priorvar(1,1) = 1000.^2.*gdpvar    ;* loose prior on intercept
  * lags of GDP growth
  if nylag>0
   {
    do l = 1,nylag
     comp priorvar(l+1,l+1) = (shrinkage(1)/float(l)^shrinkage(3))^2.  
    end do l
   }
  * other predictors  
  comp count = nylag+1
  do ii = count+1,ncoef
    set xseries = %eqnxvector(baseline,t)(ii)
    linreg(noprint) xseries
    # constant xseries{1 to arlag}           ;* but we don't make any adjustment for forecast horizon
    comp priorvar(ii,ii) = (gdpvar/%seesq)*(shrinkage(2)*shrinkage(1))^2.
  end do ii
  
  *** estimate and forecast and store results
  do q = 1,nquant
    dis '******************************* quantile = ' pctiles(q)
    comp starttime2 = %cputime()
    @univariateQR(prmean,doforecast) gdp eststpt estendpt ndraws burndraws priormean priorvar muSigma priordfSigma pctiles(q) horz coefRes SigmaRes zres ForecastRes
    # reglist
    dis 'run time in minutes for this single model = ' (%cputime()-starttime2)/60.
    *
    comp meancoef = %zeros(ncoef,1)
    do draws = 1,ndraws
     comp meancoef = meancoef + (1./ndraws)*coefRes(draws)
    end do draws
    comp quantfc = %dot(meancoef,%eqnxvector(baseline,time))
    if pctiles(q)==0.05
    { 
      comp pct05series(ww)(time) = quantfc
      comp qs05series(ww)(time) = (actualGDP(time)-quantfc)*(pctiles(q)-%if(actualGDP(time)<=quantfc,1.,0.))
     }
    else if pctiles(q)==0.1
    { 
      comp pct10series(ww)(time) = quantfc
      comp qs10series(ww)(time) = (actualGDP(time)-quantfc)*(pctiles(q)-%if(actualGDP(time)<=quantfc,1.,0.))
     }
    else if pctiles(q)==0.5
     comp fcseries(ww)(time) = quantfc
    else if pctiles(q)==0.90
     comp pct90series(ww)(time) = quantfc
    else if pctiles(q)==0.95
     comp pct95series(ww)(time) = quantfc
    *
    set alldraws(q) 1 ndraws = forecastres(t)(time)
    do draws = skipint,ndraws,skipint
     comp drawcount = %idiv(draws,skipint)
     set fcdraws((q-1)*ndraws2+drawcount) time time = forecastres(draws)(time)
    end do draws
    release coefRes SigmaRes zres ForecastRes
  end do q
  *** copy all draws (all quantiles) to data file
  comp outfilename = filelabel+"draws."+%string(yy)+"qtr"+quarter+"wk"+ww+".csv"
  open copy &outfilename
  copy(org=col,for=cdf,nodates) 1 ndraws alldraws
  close copy

 end do ww

 dis 'total time for this forecast origin = ' (%cputime()-originst)/60.
 
end do time

dis %dateandtime()
dis 'total estimation run time in minutes = ' (%cputime()-overallstart)/60.

********************************
******************************** write time series of forecast, forecast error, and pred. likelihoods/scores to Excel file
********************************
comp filename = %unitfnroot("input")+".xls"

open copy &filename
copy(for=xls,dates,org=col) startrange endrange fcseries pct05series pct10series qs05series qs10series pct90series pct95series
close copy
