library("forecast")
The dataset is the log unemployment rate, monthly, seasonally adjusted.
data <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment.csv")
date <- as.Date(data$date)
time <- 1:length(date)
unemp <- data$unemployment
log.unemp <- log(unemp)
Time series plot:
plot(date, log.unemp, type="l", col=2)
ACF and PACF:
par(mfrow=c(1,2))
Acf(log.unemp)
Pacf(log.unemp)
In HW5, we fit an ARIMA(2,1,2) model without a constant. The ACF of the residuals shows a clear seasonal correlation pattern (even though the original series was supposed to be seasonally adjusted).
fit1 <- Arima(log.unemp, c(2,1,2))
resid1 <- residuals(fit1)
Acf(resid1)
This is confirmed by the Ljung-Box tests.
Box.test(resid1, lag=12, type="Ljung-Box", fitdf=4)
Box-Ljung test
data: resid1
X-squared = 38.038, df = 8, p-value = 7.406e-06
Box.test(resid1, lag=24, type="Ljung-Box", fitdf=4)
Box-Ljung test
data: resid1
X-squared = 75.206, df = 20, p-value = 2.517e-08
Box.test(resid1, lag=36, type="Ljung-Box", fitdf=4)
Box-Ljung test
data: resid1
X-squared = 93.743, df = 32, p-value = 5.66e-08
Box.test(resid1, lag=48, type="Ljung-Box", fitdf=4)
Box-Ljung test
data: resid1
X-squared = 102.56, df = 44, p-value = 1.411e-06
The original data source claims that it is seasonally adjusted, but it does not appear that the seasonal trend has been completely removed. Let’s go back to the original (non-seasonal adjusted) data:
data.nsa <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment-nsa.csv")
unemp.nsa <- data.nsa$unemployment
log.unemp.nsa <- log(unemp.nsa)
For comparison, here is the original series, along with the seasonally-adjusted version
plot(date, log.unemp.nsa, ylab="Log Unemp.", type="l", col=2)
lines(date, log.unemp, col=1)
For the rest of this analysis, we will work with the original (not “seasonally-adjusted” data):
data <- read.csv("http://ptrckprry.com/course/forecasting/data/unemployment-nsa.csv")
date <- as.Date(data$date)
time <- 1:length(date)
unemp <- data$unemployment
log.unemp <- log(unemp)
One simple method of seasonal adjustment is to remove the monthly averages before analyzing the series. Here is code to compute these averages and adjust the series
# Extract the month of each observation
month <- factor(months(date), levels=format(ISOdate(2000,1:12,1),"%B"))
# Compute the monthly averages
month.avg <- c(by(log.unemp, month, mean))
print(month.avg)
January February March April May June July August
1.826433 1.828073 1.781699 1.692217 1.661409 1.775386 1.746873 1.689539
September October November December
1.657157 1.621396 1.660028 1.666980
log.unemp.adj <- log.unemp - month.avg[month]
For the log unemployment series, the adjustment doesn’t work very well:
plot(date, log.unemp - mean(log.unemp), type="l", col=2)
lines(date, log.unemp.adj, col=1)
Acf(diff(log.unemp.adj))
Let’s try selecting a seasonal ARIMA model.
Original Series
par(mfrow=c(1,2))
Acf(log.unemp)
Pacf(log.unemp)
First Difference
par(mfrow=c(1,2))
Acf(diff(log.unemp))
Pacf(diff(log.unemp))
Seasonal Difference
par(mfrow=c(1,2))
Acf(diff(log.unemp, 12))
Pacf(diff(log.unemp, 12))
Seasonal Difference of First Difference
par(mfrow=c(1,2))
Acf(diff(diff(log.unemp), 12))
Pacf(diff(diff(log.unemp), 12))
Seasonal Difference of Second Difference
par(mfrow=c(1,2))
Acf(diff(diff(diff(log.unemp)), 12))
Pacf(diff(diff(diff(log.unemp)), 12))
Second Seasonal Difference of First Difference
par(mfrow=c(1,2))
Acf(diff(diff(diff(log.unemp), 12), 12))
Pacf(diff(diff(diff(log.unemp), 12), 12))
d <- 1
D <- 1
for (p in 0:2) {
for (q in 0:2) {
for (P in 0:2) {
for (Q in 0:2) {
fit <- list(aicc=NA)
try({
fit <- Arima(log.unemp, c(p,d,q),
seasonal=list(order=c(P,D,Q), period=12),
method="ML")
})
cat("ARIMA",
"(", p, ",", d, ",", q, ")",
"(", P, ",", D, ",", Q, ")[12]",
" : ", fit$aicc, "\n", sep="")
}
}
}
}
ARIMA(0,1,0)(0,1,0)[12] : -2300.718
ARIMA(0,1,0)(0,1,1)[12] : -2648.87
ARIMA(0,1,0)(0,1,2)[12] : -2646.918
ARIMA(0,1,0)(1,1,0)[12] : -2505.55
ARIMA(0,1,0)(1,1,1)[12] : -2646.913
ARIMA(0,1,0)(1,1,2)[12] : -2644.842
ARIMA(0,1,0)(2,1,0)[12] : -2582.301
ARIMA(0,1,0)(2,1,1)[12] : -2645.487
ARIMA(0,1,0)(2,1,2)[12] : -2643.523
ARIMA(0,1,1)(0,1,0)[12] : -2303.743
ARIMA(0,1,1)(0,1,1)[12] : -2650.21
ARIMA(0,1,1)(0,1,2)[12] : -2648.337
ARIMA(0,1,1)(1,1,0)[12] : -2509.127
ARIMA(0,1,1)(1,1,1)[12] : -2648.324
ARIMA(0,1,1)(1,1,2)[12] : -2646.187
ARIMA(0,1,1)(2,1,0)[12] : -2590.059
ARIMA(0,1,1)(2,1,1)[12] : -2647.138
ARIMA(0,1,1)(2,1,2)[12] : -2645.127
ARIMA(0,1,2)(0,1,0)[12] : -2348.262
ARIMA(0,1,2)(0,1,1)[12] : -2669.447
ARIMA(0,1,2)(0,1,2)[12] : -2667.451
ARIMA(0,1,2)(1,1,0)[12] : -2539.523
ARIMA(0,1,2)(1,1,1)[12] : -2667.45
ARIMA(0,1,2)(1,1,2)[12] : -2665.452
ARIMA(0,1,2)(2,1,0)[12] : -2609.592
ARIMA(0,1,2)(2,1,1)[12] : -2665.6
ARIMA(0,1,2)(2,1,2)[12] : -2663.475
ARIMA(1,1,0)(0,1,0)[12] : -2306.205
ARIMA(1,1,0)(0,1,1)[12] : -2651.401
ARIMA(1,1,0)(0,1,2)[12] : -2649.555
ARIMA(1,1,0)(1,1,0)[12] : -2511.463
ARIMA(1,1,0)(1,1,1)[12] : -2649.54
ARIMA(1,1,0)(1,1,2)[12] : -2647.383
ARIMA(1,1,0)(2,1,0)[12] : -2593.542
ARIMA(1,1,0)(2,1,1)[12] : -2648.432
ARIMA(1,1,0)(2,1,2)[12] : -2646.132
ARIMA(1,1,1)(0,1,0)[12] : -2344.214
ARIMA(1,1,1)(0,1,1)[12] : -2680.821
ARIMA(1,1,1)(0,1,2)[12] : -2679.372
ARIMA(1,1,1)(1,1,0)[12] : -2549.931
ARIMA(1,1,1)(1,1,1)[12] : -2679.317
ARIMA(1,1,1)(1,1,2)[12] : -2677.633
ARIMA(1,1,1)(2,1,0)[12] : -2632.102
ARIMA(1,1,1)(2,1,1)[12] : NA
ARIMA(1,1,1)(2,1,2)[12] : -2643.32
ARIMA(1,1,2)(0,1,0)[12] : -2370.55
ARIMA(1,1,2)(0,1,1)[12] : -2692.305
ARIMA(1,1,2)(0,1,2)[12] : -2690.325
ARIMA(1,1,2)(1,1,0)[12] : -2563.595
ARIMA(1,1,2)(1,1,1)[12] : -2690.321
ARIMA(1,1,2)(1,1,2)[12] : -2688.24
ARIMA(1,1,2)(2,1,0)[12] : -2639.967
ARIMA(1,1,2)(2,1,1)[12] : -2688.928
ARIMA(1,1,2)(2,1,2)[12] : -2686.887
ARIMA(2,1,0)(0,1,0)[12] : -2356.726
ARIMA(2,1,0)(0,1,1)[12] : -2675.925
ARIMA(2,1,0)(0,1,2)[12] : -2673.919
ARIMA(2,1,0)(1,1,0)[12] : -2547.867
ARIMA(2,1,0)(1,1,1)[12] : -2673.918
ARIMA(2,1,0)(1,1,2)[12] : -2671.931
ARIMA(2,1,0)(2,1,0)[12] : -2619.545
ARIMA(2,1,0)(2,1,1)[12] : -2672.136
ARIMA(2,1,0)(2,1,2)[12] : -2670.03
ARIMA(2,1,1)(0,1,0)[12] : -2369.915
ARIMA(2,1,1)(0,1,1)[12] : -2691.518
ARIMA(2,1,1)(0,1,2)[12] : -2689.545
ARIMA(2,1,1)(1,1,0)[12] : -2563.661
ARIMA(2,1,1)(1,1,1)[12] : -2689.541
ARIMA(2,1,1)(1,1,2)[12] : -2687.455
ARIMA(2,1,1)(2,1,0)[12] : -2639.039
ARIMA(2,1,1)(2,1,1)[12] : -2688.033
ARIMA(2,1,1)(2,1,2)[12] : -2685.993
ARIMA(2,1,2)(0,1,0)[12] : -2368.54
ARIMA(2,1,2)(0,1,1)[12] : -2693.834
ARIMA(2,1,2)(0,1,2)[12] : -2691.931
ARIMA(2,1,2)(1,1,0)[12] : -2561.717
ARIMA(2,1,2)(1,1,1)[12] : -2691.916
ARIMA(2,1,2)(1,1,2)[12] : -2690.479
ARIMA(2,1,2)(2,1,0)[12] : -2648.585
ARIMA(2,1,2)(2,1,1)[12] : -2690.786
ARIMA(2,1,2)(2,1,2)[12] : NA
fit <- Arima(log.unemp, c(2,1,2), seasonal=list(order=c(0,1,1), period=12))
resid <- residuals(fit)
par(mfrow=c(1,2))
Acf(resid)
Pacf(resid)
Box.test(resid, lag=12, type="Ljung-Box", fitdf=5)
Box-Ljung test
data: resid
X-squared = 16.315, df = 7, p-value = 0.02239
Box.test(resid, lag=24, type="Ljung-Box", fitdf=5)
Box-Ljung test
data: resid
X-squared = 36.742, df = 19, p-value = 0.008545
Box.test(resid, lag=36, type="Ljung-Box", fitdf=5)
Box-Ljung test
data: resid
X-squared = 55.428, df = 31, p-value = 0.004489
Box.test(resid, lag=48, type="Ljung-Box", fitdf=5)
Box-Ljung test
data: resid
X-squared = 69.635, df = 43, p-value = 0.006225
fit0 <- Arima(log.unemp, c(2,1,2))
n <- max(time)
par(mfrow=c(1,2))
plot(forecast(fit, h=10))
plot(forecast(fit, h=10), xlim=c(n - 18, n + 10))
par(mfrow=c(1,2))
plot(forecast(fit0, h=10))
plot(forecast(fit0, h=10), xlim=c(n - 18, n + 10))
par(mfrow=c(1,2))
plot(forecast(fit, h=24))
plot(forecast(fit, h=24), xlim=c(n - 100, n + 2*12))
par(mfrow=c(1,2))
plot(forecast(fit0, h=24))
plot(forecast(fit0, h=24), xlim=c(n - 100, n + 2*12))
par(mfrow=c(1,2))
plot(forecast(fit, h=10*12))
plot(forecast(fit, h=10*12), xlim=c(n - 100, n + 10*12))
par(mfrow=c(1,2))
plot(forecast(fit0, h=10*12))
plot(forecast(fit0, h=10*12), xlim=c(n - 100, n + 10*12))
plot(forecast(fit, h=50*12))
plot(forecast(fit0, h=50*12))