## This file contains the R code to replicate application in "Spline
## Regression in the Presence of Categorical Predictors" by Shujie Ma,
## Jeffrey s. Racine, and Lijian Yang. These require installation of
## the R packages `np' and `crs' which are obtained in the usual
## manner from CRAN after having installed R.

## Please send any comments to racinej@mcmaster.ca

library(np)
library(crs)
data(oecdpanel)
complexity <- "knots"
basis <- "tensor"
knots <- "uniform"
degree <- rep(3,2)
segments.max <- 5
n <- nrow(oecdpanel)
n.train <- 600
n.eval <- (n - n.train)
nmulti <- 5

model.full <- crs(growth ~ oecd + 
             initgdp + 
             inv,
             cv="nomad",
             basis="tensor",
             degree=degree,
             segments.max=segments.max,
             complexity=complexity,
             kernel=TRUE,
             data=oecdpanel)

model.par <- lm(growth ~ oecd + 
                initgdp + 
                inv,
                data=oecdpanel)
model.par.quad <- lm(growth ~ oecd + 
                     initgdp + 
                     I(initgdp^2) + 
                     inv,
                     data=oecdpanel)

par(cex.lab=.75,cex.axis=.75)
plot(model.full,mean=TRUE,ci=TRUE,common.scale=FALSE,xtrim=0.05)

par(cex.lab=.75,cex.axis=.75)
plot(model.full,deriv=1,ci=TRUE,common.scale=FALSE,xtrim=0.05)

set.seed(123)
M <- 1000

data.all <- data.frame(growth=oecdpanel$growth,
                       oecd=factor(oecdpanel$oecd),
                       initgdp=oecdpanel$initgdp,
                       inv=oecdpanel$inv)

pmse <- numeric()
pmse.lm.lin <- numeric()
pmse.lm.quad <- numeric()

for(m in 1:M) {

  ii <- sample(n,replace=FALSE)

  train <- data.all[ii[1:n.train],]
  eval <- data.all[ii[(n.train+1):n],]
  
  pmse[m] <- mean((eval$growth-predict(model <- crs(growth ~ oecd +
                                                    initgdp +
                                                    inv,
                                                    data=train,
                                                    basis=basis,
                                                    segments=model.full$segments,
                                                    degree=model.full$degree,
                                                    lambda=model.full$lambda,
                                                    cv="none",
                                                    kernel=TRUE),
                                       newdata=eval))^2)
  
  pmse.lm.lin[m] <- mean((eval$growth-predict(lm(growth ~ oecd +
                                                 initgdp +
                                                 inv,data=train),newdata=eval))^2)
  
  pmse.lm.quad[m] <- mean((eval$growth-predict(lm(growth ~ oecd +
                                                  initgdp +
                                                  I(initgdp^2) +
                                                  inv,data=train),newdata=eval))^2)
  
}

par(cex.lab=.75,cex.axis=.75)
boxplot(data.frame(pmse.lm.lin,pmse.lm.quad,pmse),
        names=c("Linear","Quadratic","B-spline"),
        ylab="PSE",
        notch=TRUE,
        outline=FALSE)


