```{r setup, cache=FALSE, echo=FALSE}
library("RColorBrewer")    # brewer.pal
library("e1071")           # kurtosis
library("knitr")           # opts_chunk

# utility functions
source("http://ptrckprry.com/course/forecasting/code/ad.test.R")

# terminal output
options(width = 80)

# color palette
palette(brewer.pal(6, "Set1"))

# code chunk options
#opts_chunk$set(fig.width=12, fig.height=8, fig.align="center", echo=FALSE, tidy=FALSE)
opts_chunk$set(cache=TRUE, fig.align="center", comment=NA, echo=FALSE, tidy=FALSE)
```

Dow Jones Industrial Average
----------------------------

```{r}
# download the data and extract the relevant columns
data <- read.csv("http://ptrckprry.com/course/forecasting/data/dow.csv")
date <- as.Date(data$date, format="%Y-%m-%d")
dow <- data$dow
```

```{r}
# Set up the plot axes, but don't actually plot the data (type = "n")
plot(date, dow, type="n", xlab="Date", ylab="Dow")

# Add axis ticks above and below plot
#   side: 1 = bottom, 2 = left, 3 = top, 4 = right
#   lwd, lwd.ticks: line width of the axis line and ticks
#   labels: put labels at tick marks?
Axis(date, side=3, lwd=0, lwd.ticks=1, labels=FALSE) 
Axis(date, side=4, lwd=0, lwd.ticks=1, labels=FALSE)

# Add horizontal guide lines
usr <- par("usr") # get the plot limits in "user" coordinates (xmin, xmax, ymin, ymax)
abline(h=seq(usr[3], usr[4], length.out=5)[2:4], col="gray")

# Now, add the data.  We do this *after* the guide lines so that the data ends up on top
lines(date, dow, col=2)
```


Log(Dow)
--------

```{r}
# encapsulate the plotting commands into a function so that we don't have to repeat ourselves
fancy_plot <- function(x, y, hguide=0, vguide=0, type = "p", col = "black", pch = 1, cex = 1, ...) {
    # set up plot region
    plot(x, y, type="n", axes=FALSE, ...)
    
    # add axes
    Axis(x, side=1, lwd=0, lwd.ticks=1, labels=TRUE)
    Axis(y, side=2, lwd=0, lwd.ticks=1, labels=TRUE)
    Axis(x, side=3, lwd=0, lwd.ticks=1, labels=FALSE)
    Axis(y, side=4, lwd=0, lwd.ticks=1, labels=FALSE)
    
    # add horizontal guide lines
    usr <- par("usr")
    if (hguide > 0) {
        abline(h=seq(usr[3], usr[4], length.out=hguide+2)[-c(1,hguide+2)], col="gray")
    }
    if (vguide > 0) {
        abline(v=seq(usr[1], usr[2], length.out=vguide+2)[-c(1,vguide+2)], col="gray")
    }

    # add the data
    if (type == "l") {
        lines(x, y, col=col)
    } else if (type == "p") {
        points(x, y, cex=cex, pch=pch, col=col)
    }
    
    # add frame
    box()
}
```

```{r}
log_dow <- log(dow)
fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log(Dow)", hguide=3)
```

Take logs to adjust for exponential growth and level-dependent volatility.


Stylized Fact #1 {.flexbox .vcenter}
----------------

Many datasets exhibit exponential growth and level-dependent volatility.


Regress Log(Dow) on Time
------------------------

```{r}
time <- 1:length(date)
model_time <- lm(log_dow ~ time)
summary(model_time)
```


Log(Dow) vs. Time, with Fit
---------------------------

```{r}
fancy_plot(time, log_dow, type="l", col=2, xlab="Time", ylab="Log(Dow)", hguide=3)
abline(model_time, col=1, lty=2)
```


Today's vs. Yesterday's Log(Dow)
--------------------------------

```{r}
lag1_log_dow <- c(NA, log_dow[-length(log_dow)])
fancy_plot(lag1_log_dow, log_dow, cex=0.5, col=2, xlab="Yesterday's Log(Dow)", ylab="Today's Log(Dow)", asp=1)
```

Regress Today on Yesterday
--------------------------

```{r}
model_yest <- lm(log_dow ~ lag1_log_dow)
print(summary(model_yest), digits=6)
```


Slope significantly different from 1?
-------------------------------------

```{r}
s <- summary(model_yest)
print(s$coef)
```

T statistic:
```{r}
est <- s$coef["lag1_log_dow", "Estimate"]
se <- s$coef["lag1_log_dow", "Std. Error"]
tstat <- (est - 1) / se
print(tstat)
```

95% Confidence Interval:
```{r}
print(confint(model_yest, "lag1_log_dow"), digits=6)
```
  
We probably shouldn't trust these results.  (Why?)


Stylized Fact #2 {.flexbox .vcenter}
----------------

Nearby observations are highly correlated with each other.


Log(Dow) vs. Time (again)
-------------------------

```{r}
fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log(Dow)", hguide=3)
```


Histogram of Log(Dow)
---------------------

```{r}
hist(log_dow, breaks=50, col=2, xlab="Log(Dow)", main="")
```

Problem: the series is not ergodic; this histogram tells us nothing about the
generative process.


Return: Today's - Yesterday's Log(Dow)
--------------------------------------

```{r}
ret_dow <- log_dow - lag1_log_dow
fancy_plot(date, ret_dow, t="l", xlab="Date", ylab="Return", col=2)
```


Predicting Returns
------------------

```{r}
lag1_ret_dow <- c(NA, ret_dow[-length(ret_dow)])
fancy_plot(lag1_ret_dow, ret_dow, col=2, cex=0.5, xlab="Yesterday's Return", ylab="Today's Return", asp=1)
```


Regress Today's on Yesterday's Return
-------------------------------------

```{r}
model_ret <- lm(ret_dow ~ lag1_ret_dow)
summary(model_ret)
```


Stylized Fact #2 (cont.) {.flexbox .vcenter}
------------------------

Nearby observations are highly correlated with each other.  Differences are
mean-reverting.


Histogram of Returns
--------------------
```{r}
hist(ret_dow, breaks=100, col=2, xlab="Return", main="")
```


Normal Probability Plot of Returns
----------------------------------

```{r}
qqnorm(ret_dow, col=2, cex=0.5, main="", xlab="Unit Normal Quantile", ylab="Return")
qqline(ret_dow, col=1, lty=2)
axis(3, labels=FALSE)
axis(4, labels=FALSE)
```


Kurtosis, AD Test of Returns
----------------------------
```{r, echo=TRUE}
kurtosis(ret_dow, na.rm=TRUE) # excess kurtosis, 0 for Gaussian
ad.test(ret_dow)
```


Stylized Fact #3 {.flexbox .vcenter}
----------------

Returns exhibit leptokurtosis (heavy tails).


Return vs. Time (again)
-----------------------
```{r}
fancy_plot(date, ret_dow, t="l", xlab="Date", ylab="Return", col=2)
```

Persistent volatility?

Volatility (Squared Return)
---------------------------
```{r}
fancy_plot(date, (ret_dow)^2, t="l", xlab="Date", ylab="Sq. Return", col=2)
```

Volatility (Absolute Return)
----------------------------
```{r}
fancy_plot(date, abs(ret_dow), t="l", xlab="Date", ylab="Abs. Return", col=2)
```


Today's vs. Yesterday's Abs. Return
-----------------------------------
```{r}
abs_ret_dow <- abs(ret_dow)
lag1_abs_ret_dow <- c(NA, abs_ret_dow[-length(abs_ret_dow)])
fancy_plot(lag1_abs_ret_dow, abs_ret_dow, cex=0.5, col=2, asp=1,
           xlab="Yesterday's Abs. Return", ylab="Today's Abs. Return")

model_abs_ret <- lm(abs_ret_dow ~ lag1_abs_ret_dow)
abline(model_abs_ret, col=1, lty=2)
```


---
```{r}
summary(model_abs_ret)
```


Stylized Fact #4 {.flexbox .vcenter}
----------------

Volatility is persistent.

---


```{r, fig.height=6}
lag1_dow <- c(NA, dow[-length(dow)])
diff_dow <- dow - lag1_dow
diff_log_dow <- log_dow - lag1_log_dow
par(mfrow=c(2,2))
fancy_plot(date, dow, type="l", col=2, xlab="Date", ylab="Dow")
fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log Dow")
fancy_plot(date, diff_dow, type="l", col=2, xlab="Date", ylab="Diff. Dow")
fancy_plot(date, diff_log_dow, type="l", col=2, xlab="Date", ylab="Diff. Log Dow")
```


Stylized Facts
--------------

1. Many datasets exhibit exponential growth and level-dependent volatility.

2. Nearby observations are correlated with each other.

3. Returns exhibit leptokurtosis.

4. Volatility is persistent.