library("forecast")
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
Loading required package: timeDate
Loading required package: methods
This is forecast 7.3 

Annual U.S. Housing Starts

data <- read.csv("http://ptrckprry.com/course/forecasting/data/housingstarts.csv")
housing.starts <- data$housing.starts
date <- as.Date(data$date)
n <- length(date)
time <- 1:n

Time Series Plot, ACF, and PACF

plot(date, housing.starts, type="l", col=2)

Acf(housing.starts)

Pacf(housing.starts)

AICC for Candidate Models

# Without constant:
fit.00 <- Arima(housing.starts, c(0, 0, 0), include.constant=FALSE)
fit.01 <- Arima(housing.starts, c(0, 0, 1), include.constant=FALSE)
fit.02 <- Arima(housing.starts, c(0, 0, 2), include.constant=FALSE)
fit.10 <- Arima(housing.starts, c(1, 0, 0), include.constant=FALSE)
fit.11 <- Arima(housing.starts, c(1, 0, 1), include.constant=FALSE)
fit.12 <- Arima(housing.starts, c(1, 0, 2), include.constant=FALSE)
fit.20 <- Arima(housing.starts, c(2, 0, 0), include.constant=FALSE)
fit.21 <- Arima(housing.starts, c(2, 0, 1), include.constant=FALSE)
fit.22 <- Arima(housing.starts, c(2, 0, 2), include.constant=FALSE)

# With constant:
fit.00c <- Arima(housing.starts, c(0, 0, 0), include.constant=TRUE)
fit.01c <- Arima(housing.starts, c(0, 0, 1), include.constant=TRUE)
fit.02c <- Arima(housing.starts, c(0, 0, 2), include.constant=TRUE)
fit.10c <- Arima(housing.starts, c(1, 0, 0), include.constant=TRUE)
fit.11c <- Arima(housing.starts, c(1, 0, 1), include.constant=TRUE)
fit.12c <- Arima(housing.starts, c(1, 0, 2), include.constant=TRUE)
fit.20c <- Arima(housing.starts, c(2, 0, 0), include.constant=TRUE)
fit.21c <- Arima(housing.starts, c(2, 0, 1), include.constant=TRUE)
fit.22c <- Arima(housing.starts, c(2, 0, 2), include.constant=TRUE)

# Summarize Results
models <- data.frame(p = rep(c(0, 0, 0, 1, 1, 1, 2, 2, 2), 2),
                     d = rep(0, 18),
                     q = rep(c(0, 1, 2), 6),
                     include.constant = c(rep(FALSE, 9), rep(TRUE, 9)),
                     loglik = c(fit.00$loglik, fit.01$loglik, fit.02$loglik,
                                fit.10$loglik, fit.11$loglik, fit.12$loglik,
                                fit.20$loglik, fit.21$loglik, fit.22$loglik,
                                fit.00c$loglik, fit.01c$loglik, fit.02c$loglik,
                                fit.10c$loglik, fit.11c$loglik, fit.12c$loglik,
                                fit.20c$loglik, fit.21c$loglik, fit.22c$loglik),
                     aicc = c(fit.00$aicc, fit.01$aicc, fit.02$aicc,
                                fit.10$aicc, fit.11$aicc, fit.12$aicc,
                                fit.20$aicc, fit.21$aicc, fit.22$aicc,
                                fit.00c$aicc, fit.01c$aicc, fit.02c$aicc,
                                fit.10c$aicc, fit.11c$aicc, fit.12c$aicc,
                                fit.20c$aicc, fit.21c$aicc, fit.22c$aicc)
                     )
print(models, digits=6)
   p d q include.constant   loglik     aicc
1  0 0 0            FALSE -505.992 1014.056
2  0 0 1            FALSE -470.361  944.940
3  0 0 2            FALSE -441.947  890.339
4  1 0 0            FALSE -402.910  810.039
5  1 0 1            FALSE -395.414  797.273
6  1 0 2            FALSE -394.402  797.559
7  2 0 0            FALSE -397.899  802.243
8  2 0 1            FALSE -395.166  799.086
9  2 0 2            FALSE -393.506  798.167
10 0 0 0             TRUE -426.633  857.484
11 0 0 1             TRUE -401.894  810.233
12 0 0 2             TRUE -388.072  784.898
13 1 0 0             TRUE -398.337  803.118
14 1 0 1             TRUE -389.153  787.061
15 1 0 2             TRUE -385.498  782.149
16 2 0 0             TRUE -387.741  784.236
17 2 0 1             TRUE -386.484  784.122
18 2 0 2             TRUE -385.427  784.500

Selected Model

fit <- Arima(housing.starts, c(1, 0, 2), include.constant=TRUE)
print(fit)
Series: housing.starts 
ARIMA(1,0,2) with non-zero mean 

Coefficients:
         ar1     ma1     ma2  intercept
      0.4252  0.9429  0.4635  1442.4770
s.e.  0.1635  0.1608  0.1407    98.1573

sigma^2 estimated as 36024:  log likelihood=-385.5
AIC=781   AICc=782.15   BIC=791.3

Fitted model is

[x(t) - 1442.5] = 0.425 * [x(t-1) - 1442.5]
                + eps(t) + 0.943 * eps(t-1) + 0.464 * eps(t-2)

Residuals

resid <- residuals(fit)
plot(date, resid, type="l", col=2)

Acf(resid)

Pacf(resid)

Ljung-Box Goodness-of-Fit Test

# fitdf = 4 (p = 1, q = 2, model includes constant)
Box.test(resid, lag=12, type = "Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid
X-squared = 2.3467, df = 8, p-value = 0.9685
Box.test(resid, lag=24, type = "Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid
X-squared = 8.8144, df = 20, p-value = 0.985
Box.test(resid, lag=36, type = "Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid
X-squared = 17.801, df = 32, p-value = 0.9798
Box.test(resid, lag=48, type = "Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid
X-squared = 22.071, df = 44, p-value = 0.9977

Forecasts From Selected Model

plot(forecast(fit, h=30, level=95), col=2)

Model Without Constant

fit.nc <- Arima(housing.starts, c(1, 0, 2), include.constant=FALSE)
print(fit.nc)
Series: housing.starts 
ARIMA(1,0,2) with zero mean     

Coefficients:
         ar1     ma1     ma2
      0.9626  0.7033  0.2601
s.e.  0.0316  0.1555  0.1576

sigma^2 estimated as 46118:  log likelihood=-394.4
AIC=796.8   AICc=797.56   BIC=805.05

Residuals (Model Without Constant)

resid.nc <- residuals(fit.nc)
plot(date, resid.nc, type="l", col=2)

Acf(resid.nc)

Pacf(resid.nc)

Ljung-Box Goodness-of-Fit Test (Model Without Constant)

# fitdf = 3 (p = 1, q = 2, no constant)
Box.test(resid.nc, lag=12, type = "Ljung-Box", fitdf=3)

    Box-Ljung test

data:  resid.nc
X-squared = 7.2444, df = 9, p-value = 0.6117
Box.test(resid.nc, lag=24, type = "Ljung-Box", fitdf=3)

    Box-Ljung test

data:  resid.nc
X-squared = 13.832, df = 21, p-value = 0.8767
Box.test(resid.nc, lag=36, type = "Ljung-Box", fitdf=3)

    Box-Ljung test

data:  resid.nc
X-squared = 25.007, df = 33, p-value = 0.8396
Box.test(resid.nc, lag=48, type = "Ljung-Box", fitdf=3)

    Box-Ljung test

data:  resid.nc
X-squared = 26.455, df = 45, p-value = 0.9875

Forecasts From Model Without Constant

plot(forecast(fit.nc, h=100, level=95), col=2)