Chapter 9 Interupted Time Series (ITS) (2)

  • Adapted from R codes in “Interrupted time series analysis using Autoregressive Integrated Moving Average (ARIMA) models: A guide for evaluating large-scale health interventions”
  • Author: Dr. Andrea Schaffer

Install and load the following packages:

library(astsa)
library(forecast)
library(dplyr)
library(zoo)
library(tseries)

9.1 Data

Load Data from csv:

quet <- read.csv(file = 'quet.csv')

Convert data to time series object using ts():

quet.ts <- ts(quet[,2], frequency=12, start=c(2011,1))

View data:

quet.ts
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 2011 16831 17234 20546 19226 21136 20939 21103 22897 22162 22184 23108 25967
## 2012 20123 21715 24497 21720 25053 23915 24972 26183 24163 26172 26642 29086
## 2013 24002 24190 26052 26707 29077 26927 30300 29854 28824 31519 32084 33160
## 2014 24827 23285 23884 21921 22715 19919 20560 18961 18780 17998 16624 18450

9.2 Step 1: Plot data to visualize time series

options(scipen=5)
plot(quet.ts, xlim=c(2011,2015), ylim=c(0,40000), type='l', col="blue", 
     xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)

9.3 Step 2: Plot the differenced data

seasonal differencing:

plot(diff(quet.ts,12), xlim=c(2011,2015), ylim=c(-5000,5000), type='l',
     col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)

first order differencing:

plot(diff(diff(quet.ts,12)), xlim=c(2011,2015), ylim=c(-5000,5000), type='l',
     col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)

second order differencing

plot(diff(diff(diff(quet.ts,12))), xlim=c(2011,2015), ylim=c(-5000,5000), type='l', 
     col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)

Test for stationary:

# first order differencing
adf.test(diff(diff(quet.ts,12))) # not significant - not passed - nonstationary
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(diff(quet.ts, 12))
## Dickey-Fuller = -2.3836, Lag order = 3, p-value = 0.4241
## alternative hypothesis: stationary
# second order differencing
adf.test(diff(diff(diff(quet.ts,12)))) # significant - passed - stationary
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(diff(diff(quet.ts, 12)))
## Dickey-Fuller = -4.1471, Lag order = 3, p-value = 0.01591
## alternative hypothesis: stationary

9.4 Step 3: View ACF/PACF plots of differenced/seasonally differenced data

acf2(diff(diff(diff(quet.ts,12))), max.lag=24)

##       [,1]  [,2] [,3]  [,4]  [,5]  [,6]  [,7] [,8]  [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
## ACF  -0.48 -0.25 0.52 -0.36  0.02  0.24 -0.20 0.01  0.08 -0.19  0.32 -0.20 -0.21  0.45 -0.36  0.02
## PACF -0.48 -0.63 0.05 -0.21 -0.01 -0.02  0.12 0.01 -0.02 -0.29  0.31 -0.07 -0.18 -0.08 -0.18 -0.08
##      [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF    0.3 -0.27 -0.03  0.16 -0.09  0.03  0.09 -0.14
## PACF  -0.1  0.12 -0.02 -0.10 -0.05  0.09  0.20  0.17

9.5 Step 4: Build ARIMA model

  1. Create variable representing step change and view
step <- as.numeric(as.yearmon(time(quet.ts))>='Jan 2014')
step
##  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
  1. Create variable representing ramp (change in slope) and view
ramp <- append(rep(0,36), seq(1,12,1))
ramp  
##  [1]  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
## [33]  0  0  0  0  1  2  3  4  5  6  7  8  9 10 11 12
  • Use automated algorithm to identify p/q parameters
  • Specify first difference = 2 and seasonal difference = 1
model1 <- auto.arima(quet.ts,                                 # data 
                     seasonal=TRUE,          # seasonal differencing 
                     xreg=cbind(step,ramp),    # intervention effect
                     d=2,                # non-seasonal differencing 
                     D=1,                    # seasonal differencing 
                     stepwise=FALSE,             # exhaustive search
                     trace=TRUE)                       # show output 
## 
##  Regression with ARIMA(0,2,0)(0,1,0)[12] errors : 632.2689
##  Regression with ARIMA(0,2,0)(0,1,1)[12] errors : 632.4278
##  Regression with ARIMA(0,2,0)(1,1,0)[12] errors : 633.4522
##  Regression with ARIMA(0,2,0)(1,1,1)[12] errors : Inf
##  Regression with ARIMA(0,2,1)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(0,2,1)(0,1,1)[12] errors : Inf
##  Regression with ARIMA(0,2,1)(1,1,0)[12] errors : Inf
##  Regression with ARIMA(0,2,1)(1,1,1)[12] errors : Inf
##  Regression with ARIMA(0,2,2)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(0,2,2)(0,1,1)[12] errors : Inf
##  ARIMA(0,2,2)(1,1,0)[12]                    : Inf
##  ARIMA(0,2,2)(1,1,1)[12]                    : Inf
##  Regression with ARIMA(0,2,3)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(0,2,3)(0,1,1)[12] errors : Inf
##  ARIMA(0,2,3)(1,1,0)[12]                    : Inf
##  ARIMA(0,2,3)(1,1,1)[12]                    : Inf
##  Regression with ARIMA(0,2,4)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(0,2,4)(0,1,1)[12] errors : Inf
##  Regression with ARIMA(0,2,4)(1,1,0)[12] errors : 590.0708
##  Regression with ARIMA(0,2,5)(0,1,0)[12] errors : 590.8818
##  Regression with ARIMA(1,2,0)(0,1,0)[12] errors : 624.3133
##  Regression with ARIMA(1,2,0)(0,1,1)[12] errors : 622.3359
##  Regression with ARIMA(1,2,0)(1,1,0)[12] errors : 624.5849
##  Regression with ARIMA(1,2,0)(1,1,1)[12] errors : 624.5401
##  Regression with ARIMA(1,2,1)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,1)(0,1,1)[12] errors : Inf
##  Regression with ARIMA(1,2,1)(1,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,1)(1,1,1)[12] errors : Inf
##  Regression with ARIMA(1,2,2)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,2)(0,1,1)[12] errors : Inf
##  Regression with ARIMA(1,2,2)(1,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,2)(1,1,1)[12] errors : Inf
##  Regression with ARIMA(1,2,3)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,3)(0,1,1)[12] errors : 590.0542
##  Regression with ARIMA(1,2,3)(1,1,0)[12] errors : Inf
##  Regression with ARIMA(1,2,4)(0,1,0)[12] errors : 590.9442
##  Regression with ARIMA(2,2,0)(0,1,0)[12] errors : 594.9202
##  Regression with ARIMA(2,2,0)(0,1,1)[12] errors : 593.8099
##  Regression with ARIMA(2,2,0)(1,1,0)[12] errors : 594.9902
##  Regression with ARIMA(2,2,0)(1,1,1)[12] errors : 598.047
##  Regression with ARIMA(2,2,1)(0,1,0)[12] errors : 583.7067
##  Regression with ARIMA(2,2,1)(0,1,1)[12] errors : 582.0326
##  Regression with ARIMA(2,2,1)(1,1,0)[12] errors : 583.0498
##  Regression with ARIMA(2,2,1)(1,1,1)[12] errors : Inf
##  Regression with ARIMA(2,2,2)(0,1,0)[12] errors : 586.8934
##  Regression with ARIMA(2,2,2)(0,1,1)[12] errors : 585.4786
##  Regression with ARIMA(2,2,2)(1,1,0)[12] errors : 586.4878
##  Regression with ARIMA(2,2,3)(0,1,0)[12] errors : Inf
##  Regression with ARIMA(3,2,0)(0,1,0)[12] errors : 594.6178
##  Regression with ARIMA(3,2,0)(0,1,1)[12] errors : 592.4205
##  Regression with ARIMA(3,2,0)(1,1,0)[12] errors : 594.5297
##  Regression with ARIMA(3,2,0)(1,1,1)[12] errors : 597.7892
##  Regression with ARIMA(3,2,1)(0,1,0)[12] errors : 586.8978
##  Regression with ARIMA(3,2,1)(0,1,1)[12] errors : 585.4799
##  Regression with ARIMA(3,2,1)(1,1,0)[12] errors : 586.4936
##  Regression with ARIMA(3,2,2)(0,1,0)[12] errors : 590.2986
##  Regression with ARIMA(4,2,0)(0,1,0)[12] errors : 594.3236
##  Regression with ARIMA(4,2,0)(0,1,1)[12] errors : 594.4433
##  Regression with ARIMA(4,2,0)(1,1,0)[12] errors : 595.68
##  Regression with ARIMA(4,2,1)(0,1,0)[12] errors : 589.9171
##  Regression with ARIMA(5,2,0)(0,1,0)[12] errors : 591.9875
## 
## 
## 
##  Best model: Regression with ARIMA(2,2,1)(0,1,1)[12] errors

9.6 Step 5: Check residuals

checkresiduals(model1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(2,2,1)(0,1,1)[12] errors
## Q* = 12.358, df = 6, p-value = 0.05443
## 
## Model df: 4.   Total lags used: 10

Estimate parameters and confidence intervals

summary(model1)
## Series: quet.ts 
## Regression with ARIMA(2,2,1)(0,1,1)[12] errors 
## 
## Coefficients:
##           ar1      ar2      ma1     sma1        step        ramp
##       -0.8877  -0.6860  -0.9847  -0.6883  -3374.9701  -1512.6228
## s.e.   0.1267   0.1278   0.1742   0.5968    594.9716    141.0875
## 
## sigma^2 = 643481:  log likelihood = -281.86
## AIC=577.72   AICc=582.03   BIC=588.41
## 
## Training set error measures:
##                    ME     RMSE      MAE     MPE     MAPE       MASE        ACF1
## Training set 90.03804 612.6693 406.6706 0.32673 1.631969 0.07880153 -0.03762714
confint(model1)
##              2.5 %        97.5 %
## ar1     -1.1360795    -0.6394194
## ar2     -0.9365305    -0.4354141
## ma1     -1.3260894    -0.6434103
## sma1    -1.8579336     0.4813390
## step -4541.0931265 -2208.8471546
## ramp -1789.1491264 -1236.0964099

Plot data to visualize time series

options(scipen=5)
plot(quet.ts, xlim=c(2011,2015), ylim=c(0,40000), type='l', col="blue", xlab="Month", ylab="Dispensings")

# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
lines(fitted(model1), col="red", type="b")

9.7 Step 6: Calculate forecasts

To forecast the counterfactual, model data excluding post-intervention time period

model2 <- Arima(window(quet.ts, end=c(2013,12)), 
                order=c(2,2,1), 
                seasonal=list(order=c(0,1,1), period=12))

Forecast 12 months post-intervention and convert to time series object

fc <- forecast(model2, h=12)
fc.ts <- ts(as.numeric(fc$mean), start=c(2014,1), frequency=12)

Combine with observed data

quet.ts.2 <- ts.union(quet.ts, fc.ts)

Plot forecast

plot(quet.ts.2, type="l", plot.type="s", col=c('blue','red'), xlab="Month", ylab="Dispensings", xlim=c(2011,2015), ylim=c(0,40000))
abline(v=2014, lty="dashed", col="gray")