Forecasting Time Series

Preamble

library("forecast")
library("fracdiff") # for fracdiff, fdGPH

Data

Raw Data

# US CPI, Jan 1947 - Jan 2017, Monthly
data <- read.csv("http://ptrckprry.com/course/forecasting/data/cpi.csv")
head(data)

        date   cpi
1 1947-01-01 21.48
2 1947-02-01 21.62
3 1947-03-01 22.00
4 1947-04-01 22.00
5 1947-05-01 21.95
6 1947-06-01 22.08

tail(data)

          date     cpi
836 2016-08-01 240.389
837 2016-09-01 241.006
838 2016-10-01 241.694
839 2016-11-01 242.199
840 2016-12-01 242.821
841 2017-01-01 244.158

Extract Variables

date <- as.Date(data$date)
cpi <- data$cpi
log.cpi <- log(cpi)
inflation <- c(NA, diff(log.cpi))

Average Inflation

Estimate

xbar <- mean(inflation, na.rm=TRUE)
print(xbar)

[1] 0.002893682

Standard Error (?)

n <- sum(!is.na(inflation))
s <- sd(inflation, na.rm=TRUE)
se <- s / sqrt(n)
print(se)

[1] 0.0001194141

95% Confidence Interval (?)

xbar + c(-1,1) * 1.96 * se

[1] 0.002659631 0.003127734

Time Series Plot

plot(date, inflation, type="l", col=2)

ACF, PACF

par(mfrow=c(1,2))
Acf(inflation, lag.max=200)
Pacf(inflation, lag.max=200)

ACF, PACF of Diff.

par(mfrow=c(1,2))
Acf(diff(inflation), lag.max=200)
Pacf(diff(inflation), lag.max=200)

Estimated d

Method 1

## fit1 <- fracdiff(inflation) # gives an error

fit1 <- fracdiff(inflation[-1]) # remove missing value
summary(fit1)


Call:
  fracdiff(x = inflation[-1]) 

Coefficients:
   Estimate Std. Error z value Pr(>|z|)    
d 0.3868483  0.0000397    9743   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sigma[eps] = 0.002723407 
[d.tol = 0.0001221, M = 100, h = 3.972e-05]
Log likelihood:  3768 ==> AIC = -7532.733 [2 deg.freedom]

Method 2

fit2 <- fdGPH(inflation[-1])
cat("d: ", fit2$d, "\n")

d:  0.388747

cat("SE: ", fit2$sd.reg, "\n")

SE:  0.118978

Method 3

fit3 <- arfima(inflation)
summary(fit3)


Call:
  arfima(y = inflation) 

Coefficients:
        Estimate Std. Error z value Pr(>|z|)    
d      4.583e-05  2.999e-02   0.002    0.999    
ar.ar1 9.633e-01  2.868e-03 335.815   <2e-16 ***
ma.ma1 5.450e-01  2.649e-03 205.745   <2e-16 ***
ma.ma2 2.209e-01  2.815e-03  78.476   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sigma[eps] = 0.002739641 
[d.tol = 0.0001221, M = 100, h = 3.967e-05]
Log likelihood:  3764 ==> AIC = -7518.128 [5 deg.freedom]

Simulating ARFIMA

x <- fracdiff.sim(n = 5000, d = 0.39)$series

TS Plot

plot(x, type="l", col=3)

ACF, PACF

par(mfrow=c(1,2))
Acf(x, lag.max=200)
Pacf(x, lag.max=200)

Mean from Simulated Series

(mean.x <- mean(x))

[1] -0.02888058

Naive SE from Simulated Series

sd.x <- sd(x)
n.x <- length(x)
(se.x <- sd.x / sqrt(n.x))

[1] 0.01833684

Naive CI from Simulated Series

mean.x + c(-1,1) * 1.96 * se.x

[1] -0.064820778  0.007059624

Invalid due to Long Memory in Series!

1/sqrt(n)         # With indep. observations, SE is of this order

[1] 0.03450328

n^(2 * 0.39 - 1)  # With long-memory (d = 0.39), this order instead

[1] 0.2273309