Preamble

library("forecast")
library("e1071")                                                  # kurtosis
source("http://ptrckprry.com/course/forecasting/code/ad.test.R")  # ad.test

NADAQ Series

Daily Adjusted Close

data <- read.csv("http://ptrckprry.com/course/forecasting/data/nasdaq.csv")
date <- as.Date(data$date)
time <- seq_along(date)
NASDAQ <- data$nasdaq

Adjusted Close

plot(date, NASDAQ, type="l", col=2)

Differences

diff.NASDAQ <- c(NA, diff(NASDAQ))
plot(date, diff.NASDAQ, type="l", col=2)

Log (Adj. Close)

log.NASDAQ <- log(NASDAQ)
plot(date, log.NASDAQ, type="l", col=2)

Returns

diff.log.NASDAQ <- c(NA, diff(log.NASDAQ))
plot(date, diff.log.NASDAQ, type="l", col=2)

Test for Normality in Returns

Histogram

hist(diff.log.NASDAQ, 50, col=2)

Boxplot

boxplot(diff.log.NASDAQ, col=2)

Normal Quantile-Quantile Plot

qqnorm(diff.log.NASDAQ, col=2)
qqline(diff.log.NASDAQ, col=1, lty=2)

Kurtosis

kurtosis(diff.log.NASDAQ, na.rm=TRUE) # excess kurtosis, 0 for Gaussian
[1] 9.751416

Anderson-Darling Test

ad.test(diff.log.NASDAQ)

    Anderson-Darling normality test

data:  diff.log.NASDAQ
A = 265.72, p-value < 2.2e-16

Autocorrelations, Partial Autocorrelations

par(mfrow=c(1,2))
Acf(diff.log.NASDAQ)
Pacf(diff.log.NASDAQ)

ARIMA Model

Pick an Arima model.

best <- Arima(diff.log.NASDAQ, c(0, 0, 0), include.constant=FALSE)
for (p in 0:3) {
    for (q in 0:3) {
        for (ic in c(FALSE, TRUE)) {
            try({
                fit <- Arima(diff.log.NASDAQ, c(p, 0, q), include.constant=ic)
                if (fit$aicc < best$aicc) {
                    best <- fit
                }
            })
        }
    }
}
print(best)
Series: diff.log.NASDAQ 
ARIMA(3,0,3) with non-zero mean 

Coefficients:
         ar1      ar2     ar3      ma1     ma2      ma3   mean
      0.1703  -0.2167  0.8790  -0.1434  0.2024  -0.8557  4e-04
s.e.  0.0345   0.0316  0.0356   0.0371  0.0354   0.0388  1e-04

sigma^2 estimated as 0.0001515:  log likelihood=34671.92
AIC=-69327.83   AICc=-69327.82   BIC=-69268.93

Fit the model.

# p, q, constant chosen by AICc
fit <- Arima(log.NASDAQ, c(3,1,3), include.constant=TRUE)
print(fit)
Series: log.NASDAQ 
ARIMA(3,1,3) with drift         

Coefficients:
         ar1      ar2    ar3      ma1     ma2      ma3  drift
      0.1731  -0.2155  0.890  -0.1474  0.2018  -0.8676  4e-04
s.e.  0.0306   0.0284  0.032   0.0331  0.0320   0.0351  1e-04

sigma^2 estimated as 0.0001515:  log likelihood=34671.95
AIC=-69327.91   AICc=-69327.89   BIC=-69269.01

Residuals

Time Series Plot

resid <- residuals(fit)
plot(date, resid, type="l", col=3)

Ljung-Box Tests

fitdf <- 4
Box.test(resid, lag=12, type="Ljung-Box", fitdf=fitdf)

    Box-Ljung test

data:  resid
X-squared = 54.14, df = 8, p-value = 6.484e-09
Box.test(resid, lag=24, type="Ljung-Box", fitdf=fitdf)

    Box-Ljung test

data:  resid
X-squared = 95.744, df = 20, p-value = 7.218e-12
Box.test(resid, lag=36, type="Ljung-Box", fitdf=fitdf)

    Box-Ljung test

data:  resid
X-squared = 123.28, df = 32, p-value = 1.202e-12
Box.test(resid, lag=48, type="Ljung-Box", fitdf=fitdf)

    Box-Ljung test

data:  resid
X-squared = 153.44, df = 44, p-value = 4.907e-14

ACF, PACF

par(mfrow=c(1,2))
Acf(resid)
Pacf(resid)

Volatility Proxies

Squared Residual

plot(date, resid^2, type="l", col=3)

Absolute Residual

plot(date, abs(resid), type="l", col=3)

Autocorrelation in Squared Residuals

par(mfrow=c(1,2))
Acf(resid^2)
Pacf(resid^2)

First Difference

par(mfrow=c(1,2))
Acf(diff(resid^2))
Pacf(diff(resid^2))