library("forecast")

The dataset is the log unemployment rate, monthly, seasonally adjusted.

data <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment.csv")
date <- as.Date(data$date)
time <- 1:length(date)
unemp <- data$unemployment
log.unemp <- log(unemp)

Time series plot:

plot(date, log.unemp, type="l", col=2)

ACF and PACF:

par(mfrow=c(1,2))
Acf(log.unemp)
Pacf(log.unemp)


In HW5, we fit an ARIMA(2,1,2) model without a constant. The ACF of the residuals shows a clear seasonal correlation pattern (even though the original series was supposed to be seasonally adjusted).

fit1 <- Arima(log.unemp, c(2,1,2))
resid1 <- residuals(fit1)
Acf(resid1)

This is confirmed by the Ljung-Box tests.

Box.test(resid1, lag=12, type="Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid1
X-squared = 38.038, df = 8, p-value = 7.406e-06
Box.test(resid1, lag=24, type="Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid1
X-squared = 75.206, df = 20, p-value = 2.517e-08
Box.test(resid1, lag=36, type="Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid1
X-squared = 93.743, df = 32, p-value = 5.66e-08
Box.test(resid1, lag=48, type="Ljung-Box", fitdf=4)

    Box-Ljung test

data:  resid1
X-squared = 102.56, df = 44, p-value = 1.411e-06

The original data source claims that it is seasonally adjusted, but it does not appear that the seasonal trend has been completely removed. Let’s go back to the original (non-seasonal adjusted) data:

data.nsa <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment-nsa.csv")

unemp.nsa <- data.nsa$unemployment
log.unemp.nsa <- log(unemp.nsa)

For comparison, here is the original series, along with the seasonally-adjusted version

plot(date, log.unemp.nsa, ylab="Log Unemp.", type="l", col=2)
lines(date, log.unemp, col=1)


For the rest of this analysis, we will work with the original (not “seasonally-adjusted” data):

data <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment-nsa.csv")
date <- as.Date(data$date)
time <- 1:length(date)
unemp <- data$unemployment
log.unemp <- log(unemp)

One simple method of seasonal adjustment is to remove the monthly averages before analyzing the series. Here is code to compute these averages and adjust the series

# Extract the month of each observation
month <- factor(months(date), levels=format(ISOdate(2000,1:12,1),"%B"))

# Compute the monthly averages
month.avg <- c(by(log.unemp, month, mean))
print(month.avg)
  January  February     March     April       May      June      July    August 
 1.826433  1.828073  1.781699  1.692217  1.661409  1.775386  1.746873  1.689539 
September   October  November  December 
 1.657157  1.621396  1.660028  1.666980 
log.unemp.adj <- log.unemp - month.avg[month]

For the log unemployment series, the adjustment doesn’t work very well:

plot(date, log.unemp - mean(log.unemp), type="l", col=2)
lines(date, log.unemp.adj, col=1)

Acf(diff(log.unemp.adj))


Let’s try selecting a seasonal ARIMA model.

ACF and PACF to select D and d

Original Series

par(mfrow=c(1,2))
Acf(log.unemp)
Pacf(log.unemp)

First Difference

par(mfrow=c(1,2))
Acf(diff(log.unemp))
Pacf(diff(log.unemp))

Seasonal Difference

par(mfrow=c(1,2))
Acf(diff(log.unemp, 12))
Pacf(diff(log.unemp, 12))

Seasonal Difference of First Difference

par(mfrow=c(1,2))
Acf(diff(diff(log.unemp), 12))
Pacf(diff(diff(log.unemp), 12))

Seasonal Difference of Second Difference

par(mfrow=c(1,2))
Acf(diff(diff(diff(log.unemp)), 12))
Pacf(diff(diff(diff(log.unemp)), 12))

Second Seasonal Difference of First Difference

par(mfrow=c(1,2))
Acf(diff(diff(diff(log.unemp), 12), 12))
Pacf(diff(diff(diff(log.unemp), 12), 12))

Chose (p,q) and (P,Q) with AICC

d <- 1
D <- 1
for (p in 0:2) {
    for (q in 0:2) {
        for (P in 0:2) {
            for (Q in 0:2) {
                fit <- list(aicc=NA)
                try({
                    fit <- Arima(log.unemp, c(p,d,q),
                                 seasonal=list(order=c(P,D,Q), period=12),
                                 method="ML")
                })
                cat("ARIMA",
                    "(", p, ",", d, ",", q, ")",
                    "(", P, ",", D, ",", Q, ")[12]",
                    " : ", fit$aicc, "\n", sep="")
            }
        }
    }
}
ARIMA(0,1,0)(0,1,0)[12] : -2300.718
ARIMA(0,1,0)(0,1,1)[12] : -2648.87
ARIMA(0,1,0)(0,1,2)[12] : -2646.918
ARIMA(0,1,0)(1,1,0)[12] : -2505.55
ARIMA(0,1,0)(1,1,1)[12] : -2646.913
ARIMA(0,1,0)(1,1,2)[12] : -2644.842
ARIMA(0,1,0)(2,1,0)[12] : -2582.301
ARIMA(0,1,0)(2,1,1)[12] : -2645.487
ARIMA(0,1,0)(2,1,2)[12] : -2643.523
ARIMA(0,1,1)(0,1,0)[12] : -2303.743
ARIMA(0,1,1)(0,1,1)[12] : -2650.21
ARIMA(0,1,1)(0,1,2)[12] : -2648.337
ARIMA(0,1,1)(1,1,0)[12] : -2509.127
ARIMA(0,1,1)(1,1,1)[12] : -2648.324
ARIMA(0,1,1)(1,1,2)[12] : -2646.187
ARIMA(0,1,1)(2,1,0)[12] : -2590.059
ARIMA(0,1,1)(2,1,1)[12] : -2647.138
ARIMA(0,1,1)(2,1,2)[12] : -2645.127
ARIMA(0,1,2)(0,1,0)[12] : -2348.262
ARIMA(0,1,2)(0,1,1)[12] : -2669.447
ARIMA(0,1,2)(0,1,2)[12] : -2667.451
ARIMA(0,1,2)(1,1,0)[12] : -2539.523
ARIMA(0,1,2)(1,1,1)[12] : -2667.45
ARIMA(0,1,2)(1,1,2)[12] : -2665.452
ARIMA(0,1,2)(2,1,0)[12] : -2609.592
ARIMA(0,1,2)(2,1,1)[12] : -2665.6
ARIMA(0,1,2)(2,1,2)[12] : -2663.475
ARIMA(1,1,0)(0,1,0)[12] : -2306.205
ARIMA(1,1,0)(0,1,1)[12] : -2651.401
ARIMA(1,1,0)(0,1,2)[12] : -2649.555
ARIMA(1,1,0)(1,1,0)[12] : -2511.463
ARIMA(1,1,0)(1,1,1)[12] : -2649.54
ARIMA(1,1,0)(1,1,2)[12] : -2647.383
ARIMA(1,1,0)(2,1,0)[12] : -2593.542
ARIMA(1,1,0)(2,1,1)[12] : -2648.432
ARIMA(1,1,0)(2,1,2)[12] : -2646.132
ARIMA(1,1,1)(0,1,0)[12] : -2344.214
ARIMA(1,1,1)(0,1,1)[12] : -2680.821
ARIMA(1,1,1)(0,1,2)[12] : -2679.372
ARIMA(1,1,1)(1,1,0)[12] : -2549.931
ARIMA(1,1,1)(1,1,1)[12] : -2679.317
ARIMA(1,1,1)(1,1,2)[12] : -2677.633
ARIMA(1,1,1)(2,1,0)[12] : -2632.102
ARIMA(1,1,1)(2,1,1)[12] : NA
ARIMA(1,1,1)(2,1,2)[12] : -2643.32
ARIMA(1,1,2)(0,1,0)[12] : -2370.55
ARIMA(1,1,2)(0,1,1)[12] : -2692.305
ARIMA(1,1,2)(0,1,2)[12] : -2690.325
ARIMA(1,1,2)(1,1,0)[12] : -2563.595
ARIMA(1,1,2)(1,1,1)[12] : -2690.321
ARIMA(1,1,2)(1,1,2)[12] : -2688.24
ARIMA(1,1,2)(2,1,0)[12] : -2639.967
ARIMA(1,1,2)(2,1,1)[12] : -2688.928
ARIMA(1,1,2)(2,1,2)[12] : -2686.887
ARIMA(2,1,0)(0,1,0)[12] : -2356.726
ARIMA(2,1,0)(0,1,1)[12] : -2675.925
ARIMA(2,1,0)(0,1,2)[12] : -2673.919
ARIMA(2,1,0)(1,1,0)[12] : -2547.867
ARIMA(2,1,0)(1,1,1)[12] : -2673.918
ARIMA(2,1,0)(1,1,2)[12] : -2671.931
ARIMA(2,1,0)(2,1,0)[12] : -2619.545
ARIMA(2,1,0)(2,1,1)[12] : -2672.136
ARIMA(2,1,0)(2,1,2)[12] : -2670.03
ARIMA(2,1,1)(0,1,0)[12] : -2369.915
ARIMA(2,1,1)(0,1,1)[12] : -2691.518
ARIMA(2,1,1)(0,1,2)[12] : -2689.545
ARIMA(2,1,1)(1,1,0)[12] : -2563.661
ARIMA(2,1,1)(1,1,1)[12] : -2689.541
ARIMA(2,1,1)(1,1,2)[12] : -2687.455
ARIMA(2,1,1)(2,1,0)[12] : -2639.039
ARIMA(2,1,1)(2,1,1)[12] : -2688.033
ARIMA(2,1,1)(2,1,2)[12] : -2685.993
ARIMA(2,1,2)(0,1,0)[12] : -2368.54
ARIMA(2,1,2)(0,1,1)[12] : -2693.834
ARIMA(2,1,2)(0,1,2)[12] : -2691.931
ARIMA(2,1,2)(1,1,0)[12] : -2561.717
ARIMA(2,1,2)(1,1,1)[12] : -2691.916
ARIMA(2,1,2)(1,1,2)[12] : -2690.479
ARIMA(2,1,2)(2,1,0)[12] : -2648.585
ARIMA(2,1,2)(2,1,1)[12] : -2690.786
ARIMA(2,1,2)(2,1,2)[12] : NA

fit <- Arima(log.unemp, c(2,1,2), seasonal=list(order=c(0,1,1), period=12))
resid <- residuals(fit)
par(mfrow=c(1,2))
Acf(resid)
Pacf(resid)

Box.test(resid, lag=12, type="Ljung-Box", fitdf=5)

    Box-Ljung test

data:  resid
X-squared = 16.315, df = 7, p-value = 0.02239
Box.test(resid, lag=24, type="Ljung-Box", fitdf=5)

    Box-Ljung test

data:  resid
X-squared = 36.742, df = 19, p-value = 0.008545
Box.test(resid, lag=36, type="Ljung-Box", fitdf=5)

    Box-Ljung test

data:  resid
X-squared = 55.428, df = 31, p-value = 0.004489
Box.test(resid, lag=48, type="Ljung-Box", fitdf=5)

    Box-Ljung test

data:  resid
X-squared = 69.635, df = 43, p-value = 0.006225
fit0 <- Arima(log.unemp, c(2,1,2))

Short-Range (10 Month) Forecasts

n <- max(time)
par(mfrow=c(1,2))
plot(forecast(fit, h=10))
plot(forecast(fit, h=10), xlim=c(n - 18, n + 10))

par(mfrow=c(1,2))
plot(forecast(fit0, h=10))
plot(forecast(fit0, h=10), xlim=c(n - 18, n + 10))

Mid-Range (2 Year) Forecasts

par(mfrow=c(1,2))
plot(forecast(fit, h=24))
plot(forecast(fit, h=24), xlim=c(n - 100, n + 2*12))

par(mfrow=c(1,2))
plot(forecast(fit0, h=24))
plot(forecast(fit0, h=24), xlim=c(n - 100, n + 2*12))

Long-Range (10 Year) Forecasts

par(mfrow=c(1,2))
plot(forecast(fit, h=10*12))
plot(forecast(fit, h=10*12), xlim=c(n - 100, n + 10*12))

par(mfrow=c(1,2))
plot(forecast(fit0, h=10*12))
plot(forecast(fit0, h=10*12), xlim=c(n - 100, n + 10*12))

Long-Range (50 Year) Forecasts

plot(forecast(fit, h=50*12))

plot(forecast(fit0, h=50*12))