library("forecast")
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: timeDate
Loading required package: methods
This is forecast 7.3
Daily closing price: May 2, 2005 to December 29, 2006.
data <- read.csv("http://ptrckprry.com/course/forecasting/data/google.csv")
GOOG <- data$google
n <- length(GOOG)
time <- 1:n
plot(time, GOOG, type="l", col=2)
log.GOOG <- log(GOOG)
plot(time, log.GOOG, type="l", col=2)
Acf(log.GOOG)
Pacf(log.GOOG)
diff.log.GOOG <- c(NA, diff(log.GOOG))
plot(time, diff.log.GOOG, type="l", col=2)
Acf(diff.log.GOOG)
Pacf(diff.log.GOOG)
diff2.log.GOOG <- c(NA, diff(diff.log.GOOG))
plot(time, diff2.log.GOOG, type="l", col=2)
Acf(diff2.log.GOOG)
Pacf(diff2.log.GOOG)
fit.00 <- Arima(log.GOOG, c(0, 1, 0), include.constant=FALSE)
print(fit.00)
Series: log.GOOG
ARIMA(0,1,0)
sigma^2 estimated as 0.0004203: log likelihood=1036.72
AIC=-2071.45 AICc=-2071.44 BIC=-2067.41
fit.00$aicc
[1] -2071.437
# Without constant:
fit.00 <- Arima(log.GOOG, c(0, 1, 0), include.constant=FALSE)
fit.01 <- Arima(log.GOOG, c(0, 1, 1), include.constant=FALSE)
fit.02 <- Arima(log.GOOG, c(0, 1, 2), include.constant=FALSE)
fit.10 <- Arima(log.GOOG, c(1, 1, 0), include.constant=FALSE)
fit.11 <- Arima(log.GOOG, c(1, 1, 1), include.constant=FALSE)
fit.12 <- Arima(log.GOOG, c(1, 1, 2), include.constant=FALSE)
fit.20 <- Arima(log.GOOG, c(2, 1, 0), include.constant=FALSE)
fit.21 <- Arima(log.GOOG, c(2, 1, 1), include.constant=FALSE)
fit.22 <- Arima(log.GOOG, c(2, 1, 2), include.constant=FALSE)
# With constant:
fit.00c <- Arima(log.GOOG, c(0, 1, 0), include.constant=TRUE)
fit.01c <- Arima(log.GOOG, c(0, 1, 1), include.constant=TRUE)
fit.02c <- Arima(log.GOOG, c(0, 1, 2), include.constant=TRUE)
fit.10c <- Arima(log.GOOG, c(1, 1, 0), include.constant=TRUE)
fit.11c <- Arima(log.GOOG, c(1, 1, 1), include.constant=TRUE)
fit.12c <- Arima(log.GOOG, c(1, 1, 2), include.constant=TRUE)
fit.20c <- Arima(log.GOOG, c(2, 1, 0), include.constant=TRUE)
fit.21c <- Arima(log.GOOG, c(2, 1, 1), include.constant=TRUE)
fit.22c <- Arima(log.GOOG, c(2, 1, 2), include.constant=TRUE)
# Summarize Results
models <- data.frame(p = rep(c(0, 0, 0, 1, 1, 1, 2, 2, 2), 2),
d = rep(1, 18),
q = rep(c(0, 1, 2), 6),
include.constant = c(rep(FALSE, 9), rep(TRUE, 9)),
loglik = c(fit.00$loglik, fit.01$loglik, fit.02$loglik,
fit.10$loglik, fit.11$loglik, fit.12$loglik,
fit.20$loglik, fit.21$loglik, fit.22$loglik,
fit.00c$loglik, fit.01c$loglik, fit.02c$loglik,
fit.10c$loglik, fit.11c$loglik, fit.12c$loglik,
fit.20c$loglik, fit.21c$loglik, fit.22c$loglik),
aicc = c(fit.00$aicc, fit.01$aicc, fit.02$aicc,
fit.10$aicc, fit.11$aicc, fit.12$aicc,
fit.20$aicc, fit.21$aicc, fit.22$aicc,
fit.00c$aicc, fit.01c$aicc, fit.02c$aicc,
fit.10c$aicc, fit.11c$aicc, fit.12c$aicc,
fit.20c$aicc, fit.21c$aicc, fit.22c$aicc)
)
print(models, digits=6)
p d q include.constant loglik aicc
1 0 1 0 FALSE 1036.72 -2071.44
2 0 1 1 FALSE 1036.94 -2069.84
3 0 1 2 FALSE 1037.05 -2068.05
4 1 1 0 FALSE 1036.95 -2069.86
5 1 1 1 FALSE 1037.63 -2069.19
6 1 1 2 FALSE 1037.63 -2067.16
7 2 1 0 FALSE 1037.06 -2068.07
8 2 1 1 FALSE 1039.76 -2071.42
9 2 1 2 FALSE 1039.64 -2069.14
10 0 1 0 TRUE 1038.23 -2072.43
11 0 1 1 TRUE 1038.37 -2070.67
12 0 1 2 TRUE 1038.43 -2068.76
13 1 1 0 TRUE 1038.37 -2070.68
14 1 1 1 TRUE 1038.67 -2069.25
15 1 1 2 TRUE 1041.25 -2072.35
16 2 1 0 TRUE 1038.43 -2068.77
17 2 1 1 TRUE 1041.10 -2072.06
18 2 1 2 TRUE 1043.53 -2074.87
These results are unreliable. Use the method described in the next seciton instead.
# Without constant:
fit.00 <- Arima(diff.log.GOOG, c(0, 0, 0), include.constant=FALSE)
fit.01 <- Arima(diff.log.GOOG, c(0, 0, 1), include.constant=FALSE)
fit.02 <- Arima(diff.log.GOOG, c(0, 0, 2), include.constant=FALSE)
fit.10 <- Arima(diff.log.GOOG, c(1, 0, 0), include.constant=FALSE)
fit.11 <- Arima(diff.log.GOOG, c(1, 0, 1), include.constant=FALSE)
fit.12 <- Arima(diff.log.GOOG, c(1, 0, 2), include.constant=FALSE)
fit.20 <- Arima(diff.log.GOOG, c(2, 0, 0), include.constant=FALSE)
fit.21 <- Arima(diff.log.GOOG, c(2, 0, 1), include.constant=FALSE)
fit.22 <- Arima(diff.log.GOOG, c(2, 0, 2), include.constant=FALSE)
# With constant:
fit.00c <- Arima(diff.log.GOOG, c(0, 0, 0), include.constant=TRUE)
fit.01c <- Arima(diff.log.GOOG, c(0, 0, 1), include.constant=TRUE)
fit.02c <- Arima(diff.log.GOOG, c(0, 0, 2), include.constant=TRUE)
fit.10c <- Arima(diff.log.GOOG, c(1, 0, 0), include.constant=TRUE)
fit.11c <- Arima(diff.log.GOOG, c(1, 0, 1), include.constant=TRUE)
fit.12c <- Arima(diff.log.GOOG, c(1, 0, 2), include.constant=TRUE)
fit.20c <- Arima(diff.log.GOOG, c(2, 0, 0), include.constant=TRUE)
fit.21c <- Arima(diff.log.GOOG, c(2, 0, 1), include.constant=TRUE)
fit.22c <- Arima(diff.log.GOOG, c(2, 0, 2), include.constant=TRUE)
# Summarize Results
models <- data.frame(p = rep(c(0, 0, 0, 1, 1, 1, 2, 2, 2), 2),
d = rep(1, 18),
q = rep(c(0, 1, 2), 6),
include.constant = c(rep(FALSE, 9), rep(TRUE, 9)),
loglik = c(fit.00$loglik, fit.01$loglik, fit.02$loglik,
fit.10$loglik, fit.11$loglik, fit.12$loglik,
fit.20$loglik, fit.21$loglik, fit.22$loglik,
fit.00c$loglik, fit.01c$loglik, fit.02c$loglik,
fit.10c$loglik, fit.11c$loglik, fit.12c$loglik,
fit.20c$loglik, fit.21c$loglik, fit.22c$loglik),
aicc = c(fit.00$aicc, fit.01$aicc, fit.02$aicc,
fit.10$aicc, fit.11$aicc, fit.12$aicc,
fit.20$aicc, fit.21$aicc, fit.22$aicc,
fit.00c$aicc, fit.01c$aicc, fit.02c$aicc,
fit.10c$aicc, fit.11c$aicc, fit.12c$aicc,
fit.20c$aicc, fit.21c$aicc, fit.22c$aicc)
)
print(models, digits=6)
p d q include.constant loglik aicc
1 0 1 0 FALSE 1036.72 -2071.44
2 0 1 1 FALSE 1036.94 -2069.84
3 0 1 2 FALSE 1037.05 -2068.05
4 1 1 0 FALSE 1036.95 -2069.86
5 1 1 1 FALSE 1037.63 -2069.19
6 1 1 2 FALSE 1039.91 -2071.72
7 2 1 0 FALSE 1037.06 -2068.07
8 2 1 1 FALSE 1039.76 -2071.42
9 2 1 2 FALSE 1039.64 -2069.14
10 0 1 0 TRUE 1038.23 -2072.43
11 0 1 1 TRUE 1038.37 -2070.67
12 0 1 2 TRUE 1038.43 -2068.76
13 1 1 0 TRUE 1038.37 -2070.68
14 1 1 1 TRUE 1038.67 -2069.24
15 1 1 2 TRUE 1041.25 -2072.35
16 2 1 0 TRUE 1038.43 -2068.77
17 2 1 1 TRUE 1041.10 -2072.06
18 2 1 2 TRUE 1040.73 -2069.26
fit.best <- Arima(log.GOOG, c(0, 1, 0), include.constant=TRUE)
print(fit.best)
Series: log.GOOG
ARIMA(0,1,0) with drift
Coefficients:
drift
0.0017
s.e. 0.0010
sigma^2 estimated as 0.0004183: log likelihood=1038.23
AIC=-2072.46 AICc=-2072.43 BIC=-2064.38
Note: constant (drift
) is not significant. However, the hypothesis tests relies on the model being correct, so the z-statistic and corresponding p-value are probably not reliable. It’s better to use AICC to determine whether to include a constant.
resid <- residuals(fit.best)
plot(time, resid, type="l", col=2)
Acf(resid)
Pacf(resid)
# fitdf = 1 (p = 0, q = 0, model includes constant)
Box.test(resid, lag=12, type = "Ljung-Box", fitdf=1)
Box-Ljung test
data: resid
X-squared = 21.904, df = 11, p-value = 0.02513
Box.test(resid, lag=24, type = "Ljung-Box", fitdf=1)
Box-Ljung test
data: resid
X-squared = 27.885, df = 23, p-value = 0.2202
Box.test(resid, lag=36, type = "Ljung-Box", fitdf=1)
Box-Ljung test
data: resid
X-squared = 40.881, df = 35, p-value = 0.2279
Box.test(resid, lag=48, type = "Ljung-Box", fitdf=1)
Box-Ljung test
data: resid
X-squared = 54.659, df = 47, p-value = 0.2065
forecast(fit.best, h=10, level=95)
Point Forecast Lo 95 Hi 95
422 6.134003 6.093917 6.174090
423 6.135737 6.079047 6.192428
424 6.137471 6.068040 6.206903
425 6.139205 6.059033 6.219378
426 6.140940 6.051303 6.230576
427 6.142674 6.044482 6.240865
428 6.144408 6.038349 6.250466
429 6.146142 6.032760 6.259523
430 6.147876 6.027616 6.268135
431 6.149610 6.022845 6.276374
plot(forecast(fit.best, h=100, level=95), col=2)
fit.00 <- Arima(log.GOOG, c(0, 1, 0), include.constant=FALSE)
plot(forecast(fit.00, h=100, level=95), col=2)
fit.11 <- Arima(log.GOOG, c(1, 1, 1), include.constant=FALSE)
print(fit.11)
Series: log.GOOG
ARIMA(1,1,1)
Coefficients:
ar1 ma1
0.8953 -0.8659
s.e. 0.1402 0.1571
sigma^2 estimated as 0.0004205: log likelihood=1037.63
AIC=-2069.25 AICc=-2069.19 BIC=-2057.13
(zstat.ar1 <- 0.895 / 0.140)
[1] 6.392857
(pval.ar1 <- 2*pnorm(-abs(zstat.ar1)))
[1] 1.628144e-10
(zstat.ma1 <- -0.866 / 0.157)
[1] -5.515924
(pval.ma1 <- 2*pnorm(-abs(zstat.ma1)))
[1] 3.46953e-08