library("forecast")
library("e1071")                                                  # kurtosis
source("http://ptrckprry.com/course/forecasting/code/ad.test.R")  # ad.test
Daily Adjusted Close
data <- read.csv("http://ptrckprry.com/course/forecasting/data/nasdaq.csv")
date <- as.Date(data$date)
time <- seq_along(date)
NASDAQ <- data$nasdaq
plot(date, NASDAQ, type="l", col=2)
diff.NASDAQ <- c(NA, diff(NASDAQ))
plot(date, diff.NASDAQ, type="l", col=2)
log.NASDAQ <- log(NASDAQ)
plot(date, log.NASDAQ, type="l", col=2)
diff.log.NASDAQ <- c(NA, diff(log.NASDAQ))
plot(date, diff.log.NASDAQ, type="l", col=2)
Histogram
hist(diff.log.NASDAQ, 50, col=2)
Boxplot
boxplot(diff.log.NASDAQ, col=2)
Normal Quantile-Quantile Plot
qqnorm(diff.log.NASDAQ, col=2)
qqline(diff.log.NASDAQ, col=1, lty=2)
Kurtosis
kurtosis(diff.log.NASDAQ, na.rm=TRUE) # excess kurtosis, 0 for Gaussian
[1] 9.751416
Anderson-Darling Test
ad.test(diff.log.NASDAQ)
    Anderson-Darling normality test
data:  diff.log.NASDAQ
A = 265.72, p-value < 2.2e-16
par(mfrow=c(1,2))
Acf(diff.log.NASDAQ)
Pacf(diff.log.NASDAQ)
Pick an Arima model.
best <- Arima(diff.log.NASDAQ, c(0, 0, 0), include.constant=FALSE)
for (p in 0:3) {
    for (q in 0:3) {
        for (ic in c(FALSE, TRUE)) {
            try({
                fit <- Arima(diff.log.NASDAQ, c(p, 0, q), include.constant=ic)
                if (fit$aicc < best$aicc) {
                    best <- fit
                }
            })
        }
    }
}
print(best)
Series: diff.log.NASDAQ 
ARIMA(3,0,3) with non-zero mean 
Coefficients:
         ar1      ar2     ar3      ma1     ma2      ma3   mean
      0.1703  -0.2167  0.8790  -0.1434  0.2024  -0.8557  4e-04
s.e.  0.0345   0.0316  0.0356   0.0371  0.0354   0.0388  1e-04
sigma^2 estimated as 0.0001515:  log likelihood=34671.92
AIC=-69327.83   AICc=-69327.82   BIC=-69268.93
Fit the model.
# p, q, constant chosen by AICc
fit <- Arima(log.NASDAQ, c(3,1,3), include.constant=TRUE)
print(fit)
Series: log.NASDAQ 
ARIMA(3,1,3) with drift         
Coefficients:
         ar1      ar2    ar3      ma1     ma2      ma3  drift
      0.1731  -0.2155  0.890  -0.1474  0.2018  -0.8676  4e-04
s.e.  0.0306   0.0284  0.032   0.0331  0.0320   0.0351  1e-04
sigma^2 estimated as 0.0001515:  log likelihood=34671.95
AIC=-69327.91   AICc=-69327.89   BIC=-69269.01
Time Series Plot
resid <- residuals(fit)
plot(date, resid, type="l", col=3)
Ljung-Box Tests
fitdf <- 4
Box.test(resid, lag=12, type="Ljung-Box", fitdf=fitdf)
    Box-Ljung test
data:  resid
X-squared = 54.14, df = 8, p-value = 6.484e-09
Box.test(resid, lag=24, type="Ljung-Box", fitdf=fitdf)
    Box-Ljung test
data:  resid
X-squared = 95.744, df = 20, p-value = 7.218e-12
Box.test(resid, lag=36, type="Ljung-Box", fitdf=fitdf)
    Box-Ljung test
data:  resid
X-squared = 123.28, df = 32, p-value = 1.202e-12
Box.test(resid, lag=48, type="Ljung-Box", fitdf=fitdf)
    Box-Ljung test
data:  resid
X-squared = 153.44, df = 44, p-value = 4.907e-14
ACF, PACF
par(mfrow=c(1,2))
Acf(resid)
Pacf(resid)
Squared Residual
plot(date, resid^2, type="l", col=3)
Absolute Residual
plot(date, abs(resid), type="l", col=3)
par(mfrow=c(1,2))
Acf(resid^2)
Pacf(resid^2)
par(mfrow=c(1,2))
Acf(diff(resid^2))
Pacf(diff(resid^2))