Preamble

library("forecast")
library("tseries")

NADAQ Series

Daily Adjusted Close

data <- read.csv("http://ptrckprry.com/course/forecasting/data/nasdaq.csv")
date <- as.Date(data$date)
time <- seq_along(date)
NASDAQ <- data$nasdaq

Log (Adj. Close)

log.NASDAQ <- log(NASDAQ)
plot(date, log.NASDAQ, type="l", col=2)

ACF, PACF of Returns

diff.log.NASDAQ <- c(NA, diff(log.NASDAQ))

par(mfrow=c(1,2))
Acf(diff.log.NASDAQ)
Pacf(diff.log.NASDAQ)

ARIMA Identification

d <- 1

# choose p, q with AICc
for (include.constant in c(FALSE, TRUE)) {
    for (p in 0:4) {
        for (q in 0:4) {
            # work-around bug in R by manually differencing
            fit <- Arima(diff(log.NASDAQ), c(p,0,q),
                         include.constant=include.constant, method="ML")
             cat("ARIMA",
                 "(", p, ",", d, ",", q, ")",
                 "(constant=", include.constant, ")",
                 " : ", fit$aicc, "\n", sep="")
        }
    }
}
ARIMA(0,1,0)(constant=FALSE) : -69264.13
ARIMA(0,1,1)(constant=FALSE) : -69296.54
ARIMA(0,1,2)(constant=FALSE) : -69300.3
ARIMA(0,1,3)(constant=FALSE) : -69300.84
ARIMA(0,1,4)(constant=FALSE) : -69302.25
ARIMA(1,1,0)(constant=FALSE) : -69294.94
ARIMA(1,1,1)(constant=FALSE) : -69300.83
ARIMA(1,1,2)(constant=FALSE) : -69298.38
ARIMA(1,1,3)(constant=FALSE) : -69299
ARIMA(1,1,4)(constant=FALSE) : -69300.19
ARIMA(2,1,0)(constant=FALSE) : -69299.5
ARIMA(2,1,1)(constant=FALSE) : -69299.23
ARIMA(2,1,2)(constant=FALSE) : -69296.9
ARIMA(2,1,3)(constant=FALSE) : -69299.31
ARIMA(2,1,4)(constant=FALSE) : -69298.2
ARIMA(3,1,0)(constant=FALSE) : -69301.84
ARIMA(3,1,1)(constant=FALSE) : -69299.67
ARIMA(3,1,2)(constant=FALSE) : -69299.5
ARIMA(3,1,3)(constant=FALSE) : -69322.59
ARIMA(3,1,4)(constant=FALSE) : -69335.59
ARIMA(4,1,0)(constant=FALSE) : -69302.14
ARIMA(4,1,1)(constant=FALSE) : -69300.2
ARIMA(4,1,2)(constant=FALSE) : -69298.18
ARIMA(4,1,3)(constant=FALSE) : -69335.13
ARIMA(4,1,4)(constant=FALSE) : -69335.2
ARIMA(0,1,0)(constant=TRUE) : -69271.5
ARIMA(0,1,1)(constant=TRUE) : -69302.98
ARIMA(0,1,2)(constant=TRUE) : -69307.12
ARIMA(0,1,3)(constant=TRUE) : -69307.42
ARIMA(0,1,4)(constant=TRUE) : -69308.55
ARIMA(1,1,0)(constant=TRUE) : -69301.37
ARIMA(1,1,1)(constant=TRUE) : -69307.61
ARIMA(1,1,2)(constant=TRUE) : -69305.88
ARIMA(1,1,3)(constant=TRUE) : -69305.56
ARIMA(1,1,4)(constant=TRUE) : -69306.48
ARIMA(2,1,0)(constant=TRUE) : -69306.35
ARIMA(2,1,1)(constant=TRUE) : -69305.66
ARIMA(2,1,2)(constant=TRUE) : -69303.65
ARIMA(2,1,3)(constant=TRUE) : -69305.83
ARIMA(2,1,4)(constant=TRUE) : -69304.5
ARIMA(3,1,0)(constant=TRUE) : -69308.36
ARIMA(3,1,1)(constant=TRUE) : -69306.2
ARIMA(3,1,2)(constant=TRUE) : -69306.25
ARIMA(3,1,3)(constant=TRUE) : -69327.82
ARIMA(3,1,4)(constant=TRUE) : -69339.43
ARIMA(4,1,0)(constant=TRUE) : -69308.43
ARIMA(4,1,1)(constant=TRUE) : -69306.49
ARIMA(4,1,2)(constant=TRUE) : -69304.46
ARIMA(4,1,3)(constant=TRUE) : -69337.68
ARIMA(4,1,4)(constant=TRUE) : -69339.37

Selected ARIMA Model

(fit.mean <- Arima(log.NASDAQ, c(3, 1, 4), include.constant=TRUE, method="ML"))
Series: log.NASDAQ 
ARIMA(3,1,4) with drift         

Coefficients:
Warning in sqrt(diag(x$var.coef)): NaNs produced
         ar1      ar2     ar3      ma1     ma2      ma3      ma4  drift
      0.1747  -0.2358  0.9165  -0.1197  0.2185  -0.8887  -0.0377  3e-04
s.e.     NaN      NaN  0.0244      NaN     NaN   0.0271   0.0100  1e-04

sigma^2 estimated as 0.0001514:  log likelihood=34678.72
AIC=-69339.45   AICc=-69339.43   BIC=-69273.19

Residuals

Time Series Plots

resid <- as.numeric(residuals(fit.mean))
plot(date, abs(resid), type="l", col=3)

plot(date, resid^2, type="l", col=3)

ACF, PACF

Abs. Residuals

par(mfrow=c(1,2))
Acf(abs(resid))
Pacf(abs(resid))

Sq. Residuals

par(mfrow=c(1,2))
Acf(resid^2)
Pacf(resid^2)

ARCH Identification

AICC Values for ARCH Models

q <- 0:10
loglik <- rep(NA, length(q))
N <- length(resid)

for (i in 1:length(q)) {
    if (q[i] == 0) {
        loglik[i] <- -0.5 * N * (1 + log(2 * pi * mean(resid^2)))
    } else {
        fit <- garch(resid, c(0,q[i]), trace=FALSE)
        loglik[i] <- logLik(fit)
    }
}

k <- q + 1
aicc <- -2 * loglik  + 2 * k * N / (N - k - 1)

print(data.frame(q, loglik, aicc))
    q   loglik      aicc
1   0 34682.18 -69362.36
2   1 35810.87 -71617.74
3   2 36763.35 -73520.70
4   3 37137.86 -74267.73
5   4 37440.09 -74870.18
6   5 37644.86 -75277.72
7   6 37725.08 -75436.15
8   7 37778.01 -75540.00
9   8 37813.34 -75608.66
10  9 37842.93 -75665.84
11 10 37859.61 -75697.20

AICC Value for GARCH(1,1) Model

fit <- garch(resid, c(1,1), trace=FALSE)
loglik <- logLik(fit)
k <- 2
aicc <- -2 * loglik  + 2 * k * N / (N - k - 1)

print(data.frame(loglik, aicc))
    loglik      aicc
1 37984.07 -75964.13

Selected Model

fit.var <- garch(resid, c(1,1), trace=FALSE)
summary(fit.var)

Call:
garch(x = resid, order = c(1, 1), trace = FALSE)

Model:
GARCH(1,1)

Residuals:
     Min       1Q   Median       3Q      Max 
-7.53368 -0.56012  0.07279  0.63839  5.03026 

Coefficient(s):
    Estimate  Std. Error  t value Pr(>|t|)    
a0 1.321e-06   9.564e-08    13.81   <2e-16 ***
a1 1.077e-01   3.200e-03    33.65   <2e-16 ***
b1 8.845e-01   3.507e-03   252.17   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Diagnostic Tests:
    Jarque Bera Test

data:  Residuals
X-squared = 2675.2, df = 2, p-value < 2.2e-16


    Box-Ljung test

data:  Squared.Residuals
X-squared = 16.038, df = 1, p-value = 6.207e-05

Conditional Variances

ht <- fit.var$fit[,1]^2
plot(date, ht, type="l", col=4)

ARCH Residuals

Time Series Plot

resid.arch <- resid / sqrt(ht)
plot(date, resid.arch, col=4, type="l")

ACF, PACF of Squares

par(mfrow=c(1,2))
Acf(resid.arch^2)
Pacf(resid.arch^2)