```{r setup, cache=FALSE, echo=FALSE} library("RColorBrewer") # brewer.pal library("e1071") # kurtosis library("knitr") # opts_chunk # utility functions source("http://ptrckprry.com/course/forecasting/code/ad.test.R") # terminal output options(width = 80) # color palette palette(brewer.pal(6, "Set1")) # code chunk options #opts_chunk$set(fig.width=12, fig.height=8, fig.align="center", echo=FALSE, tidy=FALSE) opts_chunk$set(cache=TRUE, fig.align="center", comment=NA, echo=FALSE, tidy=FALSE) ``` Dow Jones Industrial Average ---------------------------- ```{r} # download the data and extract the relevant columns data <- read.csv("http://ptrckprry.com/course/forecasting/data/dow.csv") date <- as.Date(data$date, format="%Y-%m-%d") dow <- data$dow ``` ```{r} # Set up the plot axes, but don't actually plot the data (type = "n") plot(date, dow, type="n", xlab="Date", ylab="Dow") # Add axis ticks above and below plot # side: 1 = bottom, 2 = left, 3 = top, 4 = right # lwd, lwd.ticks: line width of the axis line and ticks # labels: put labels at tick marks? Axis(date, side=3, lwd=0, lwd.ticks=1, labels=FALSE) Axis(date, side=4, lwd=0, lwd.ticks=1, labels=FALSE) # Add horizontal guide lines usr <- par("usr") # get the plot limits in "user" coordinates (xmin, xmax, ymin, ymax) abline(h=seq(usr[3], usr[4], length.out=5)[2:4], col="gray") # Now, add the data. We do this *after* the guide lines so that the data ends up on top lines(date, dow, col=2) ``` Log(Dow) -------- ```{r} # encapsulate the plotting commands into a function so that we don't have to repeat ourselves fancy_plot <- function(x, y, hguide=0, vguide=0, type = "p", col = "black", pch = 1, cex = 1, ...) { # set up plot region plot(x, y, type="n", axes=FALSE, ...) # add axes Axis(x, side=1, lwd=0, lwd.ticks=1, labels=TRUE) Axis(y, side=2, lwd=0, lwd.ticks=1, labels=TRUE) Axis(x, side=3, lwd=0, lwd.ticks=1, labels=FALSE) Axis(y, side=4, lwd=0, lwd.ticks=1, labels=FALSE) # add horizontal guide lines usr <- par("usr") if (hguide > 0) { abline(h=seq(usr[3], usr[4], length.out=hguide+2)[-c(1,hguide+2)], col="gray") } if (vguide > 0) { abline(v=seq(usr[1], usr[2], length.out=vguide+2)[-c(1,vguide+2)], col="gray") } # add the data if (type == "l") { lines(x, y, col=col) } else if (type == "p") { points(x, y, cex=cex, pch=pch, col=col) } # add frame box() } ``` ```{r} log_dow <- log(dow) fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log(Dow)", hguide=3) ``` Take logs to adjust for exponential growth and level-dependent volatility. Stylized Fact #1 {.flexbox .vcenter} ---------------- Many datasets exhibit exponential growth and level-dependent volatility. Regress Log(Dow) on Time ------------------------ ```{r} time <- 1:length(date) model_time <- lm(log_dow ~ time) summary(model_time) ``` Log(Dow) vs. Time, with Fit --------------------------- ```{r} fancy_plot(time, log_dow, type="l", col=2, xlab="Time", ylab="Log(Dow)", hguide=3) abline(model_time, col=1, lty=2) ``` Today's vs. Yesterday's Log(Dow) -------------------------------- ```{r} lag1_log_dow <- c(NA, log_dow[-length(log_dow)]) fancy_plot(lag1_log_dow, log_dow, cex=0.5, col=2, xlab="Yesterday's Log(Dow)", ylab="Today's Log(Dow)", asp=1) ``` Regress Today on Yesterday -------------------------- ```{r} model_yest <- lm(log_dow ~ lag1_log_dow) print(summary(model_yest), digits=6) ``` Slope significantly different from 1? ------------------------------------- ```{r} s <- summary(model_yest) print(s$coef) ``` T statistic: ```{r} est <- s$coef["lag1_log_dow", "Estimate"] se <- s$coef["lag1_log_dow", "Std. Error"] tstat <- (est - 1) / se print(tstat) ``` 95% Confidence Interval: ```{r} print(confint(model_yest, "lag1_log_dow"), digits=6) ``` We probably shouldn't trust these results. (Why?) Stylized Fact #2 {.flexbox .vcenter} ---------------- Nearby observations are highly correlated with each other. Log(Dow) vs. Time (again) ------------------------- ```{r} fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log(Dow)", hguide=3) ``` Histogram of Log(Dow) --------------------- ```{r} hist(log_dow, breaks=50, col=2, xlab="Log(Dow)", main="") ``` Problem: the series is not ergodic; this histogram tells us nothing about the generative process. Return: Today's - Yesterday's Log(Dow) -------------------------------------- ```{r} ret_dow <- log_dow - lag1_log_dow fancy_plot(date, ret_dow, t="l", xlab="Date", ylab="Return", col=2) ``` Predicting Returns ------------------ ```{r} lag1_ret_dow <- c(NA, ret_dow[-length(ret_dow)]) fancy_plot(lag1_ret_dow, ret_dow, col=2, cex=0.5, xlab="Yesterday's Return", ylab="Today's Return", asp=1) ``` Regress Today's on Yesterday's Return ------------------------------------- ```{r} model_ret <- lm(ret_dow ~ lag1_ret_dow) summary(model_ret) ``` Stylized Fact #2 (cont.) {.flexbox .vcenter} ------------------------ Nearby observations are highly correlated with each other. Differences are mean-reverting. Histogram of Returns -------------------- ```{r} hist(ret_dow, breaks=100, col=2, xlab="Return", main="") ``` Normal Probability Plot of Returns ---------------------------------- ```{r} qqnorm(ret_dow, col=2, cex=0.5, main="", xlab="Unit Normal Quantile", ylab="Return") qqline(ret_dow, col=1, lty=2) axis(3, labels=FALSE) axis(4, labels=FALSE) ``` Kurtosis, AD Test of Returns ---------------------------- ```{r, echo=TRUE} kurtosis(ret_dow, na.rm=TRUE) # excess kurtosis, 0 for Gaussian ad.test(ret_dow) ``` Stylized Fact #3 {.flexbox .vcenter} ---------------- Returns exhibit leptokurtosis (heavy tails). Return vs. Time (again) ----------------------- ```{r} fancy_plot(date, ret_dow, t="l", xlab="Date", ylab="Return", col=2) ``` Persistent volatility? Volatility (Squared Return) --------------------------- ```{r} fancy_plot(date, (ret_dow)^2, t="l", xlab="Date", ylab="Sq. Return", col=2) ``` Volatility (Absolute Return) ---------------------------- ```{r} fancy_plot(date, abs(ret_dow), t="l", xlab="Date", ylab="Abs. Return", col=2) ``` Today's vs. Yesterday's Abs. Return ----------------------------------- ```{r} abs_ret_dow <- abs(ret_dow) lag1_abs_ret_dow <- c(NA, abs_ret_dow[-length(abs_ret_dow)]) fancy_plot(lag1_abs_ret_dow, abs_ret_dow, cex=0.5, col=2, asp=1, xlab="Yesterday's Abs. Return", ylab="Today's Abs. Return") model_abs_ret <- lm(abs_ret_dow ~ lag1_abs_ret_dow) abline(model_abs_ret, col=1, lty=2) ``` --- ```{r} summary(model_abs_ret) ``` Stylized Fact #4 {.flexbox .vcenter} ---------------- Volatility is persistent. --- ```{r, fig.height=6} lag1_dow <- c(NA, dow[-length(dow)]) diff_dow <- dow - lag1_dow diff_log_dow <- log_dow - lag1_log_dow par(mfrow=c(2,2)) fancy_plot(date, dow, type="l", col=2, xlab="Date", ylab="Dow") fancy_plot(date, log_dow, type="l", col=2, xlab="Date", ylab="Log Dow") fancy_plot(date, diff_dow, type="l", col=2, xlab="Date", ylab="Diff. Dow") fancy_plot(date, diff_log_dow, type="l", col=2, xlab="Date", ylab="Diff. Log Dow") ``` Stylized Facts -------------- 1. Many datasets exhibit exponential growth and level-dependent volatility. 2. Nearby observations are correlated with each other. 3. Returns exhibit leptokurtosis. 4. Volatility is persistent.