Chapter 9 Interupted Time Series (ITS) (2)
- Adapted from R codes in “Interrupted time series analysis using Autoregressive Integrated Moving Average (ARIMA) models: A guide for evaluating large-scale health interventions”
- Author: Dr. Andrea Schaffer
Install and load the following packages:
library(astsa)
library(forecast)
library(dplyr)
library(zoo)
library(tseries)
9.1 Data
Load Data from csv:
<- read.csv(file = 'quet.csv') quet
Convert data to time series object using ts():
<- ts(quet[,2], frequency=12, start=c(2011,1)) quet.ts
View data:
quet.ts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 2011 16831 17234 20546 19226 21136 20939 21103 22897 22162 22184 23108 25967
## 2012 20123 21715 24497 21720 25053 23915 24972 26183 24163 26172 26642 29086
## 2013 24002 24190 26052 26707 29077 26927 30300 29854 28824 31519 32084 33160
## 2014 24827 23285 23884 21921 22715 19919 20560 18961 18780 17998 16624 18450
9.2 Step 1: Plot data to visualize time series
options(scipen=5)
plot(quet.ts, xlim=c(2011,2015), ylim=c(0,40000), type='l', col="blue",
xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
9.3 Step 2: Plot the differenced data
seasonal differencing:
plot(diff(quet.ts,12), xlim=c(2011,2015), ylim=c(-5000,5000), type='l',
col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
first order differencing:
plot(diff(diff(quet.ts,12)), xlim=c(2011,2015), ylim=c(-5000,5000), type='l',
col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
second order differencing
plot(diff(diff(diff(quet.ts,12))), xlim=c(2011,2015), ylim=c(-5000,5000), type='l',
col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
Test for stationary:
# first order differencing
adf.test(diff(diff(quet.ts,12))) # not significant - not passed - nonstationary
##
## Augmented Dickey-Fuller Test
##
## data: diff(diff(quet.ts, 12))
## Dickey-Fuller = -2.3836, Lag order = 3, p-value = 0.4241
## alternative hypothesis: stationary
# second order differencing
adf.test(diff(diff(diff(quet.ts,12)))) # significant - passed - stationary
##
## Augmented Dickey-Fuller Test
##
## data: diff(diff(diff(quet.ts, 12)))
## Dickey-Fuller = -4.1471, Lag order = 3, p-value = 0.01591
## alternative hypothesis: stationary
9.4 Step 3: View ACF/PACF plots of differenced/seasonally differenced data
acf2(diff(diff(diff(quet.ts,12))), max.lag=24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
## ACF -0.48 -0.25 0.52 -0.36 0.02 0.24 -0.20 0.01 0.08 -0.19 0.32 -0.20 -0.21 0.45 -0.36 0.02
## PACF -0.48 -0.63 0.05 -0.21 -0.01 -0.02 0.12 0.01 -0.02 -0.29 0.31 -0.07 -0.18 -0.08 -0.18 -0.08
## [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.3 -0.27 -0.03 0.16 -0.09 0.03 0.09 -0.14
## PACF -0.1 0.12 -0.02 -0.10 -0.05 0.09 0.20 0.17
9.5 Step 4: Build ARIMA model
- Create variable representing step change and view
<- as.numeric(as.yearmon(time(quet.ts))>='Jan 2014')
step step
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1
- Create variable representing ramp (change in slope) and view
<- append(rep(0,36), seq(1,12,1))
ramp ramp
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [33] 0 0 0 0 1 2 3 4 5 6 7 8 9 10 11 12
- Use automated algorithm to identify p/q parameters
- Specify first difference = 2 and seasonal difference = 1
<- auto.arima(quet.ts, # data
model1 seasonal=TRUE, # seasonal differencing
xreg=cbind(step,ramp), # intervention effect
d=2, # non-seasonal differencing
D=1, # seasonal differencing
stepwise=FALSE, # exhaustive search
trace=TRUE) # show output
##
## Regression with ARIMA(0,2,0)(0,1,0)[12] errors : 632.2689
## Regression with ARIMA(0,2,0)(0,1,1)[12] errors : 632.4278
## Regression with ARIMA(0,2,0)(1,1,0)[12] errors : 633.4522
## Regression with ARIMA(0,2,0)(1,1,1)[12] errors : Inf
## Regression with ARIMA(0,2,1)(0,1,0)[12] errors : Inf
## Regression with ARIMA(0,2,1)(0,1,1)[12] errors : Inf
## Regression with ARIMA(0,2,1)(1,1,0)[12] errors : Inf
## Regression with ARIMA(0,2,1)(1,1,1)[12] errors : Inf
## Regression with ARIMA(0,2,2)(0,1,0)[12] errors : Inf
## Regression with ARIMA(0,2,2)(0,1,1)[12] errors : Inf
## ARIMA(0,2,2)(1,1,0)[12] : Inf
## ARIMA(0,2,2)(1,1,1)[12] : Inf
## Regression with ARIMA(0,2,3)(0,1,0)[12] errors : Inf
## Regression with ARIMA(0,2,3)(0,1,1)[12] errors : Inf
## ARIMA(0,2,3)(1,1,0)[12] : Inf
## ARIMA(0,2,3)(1,1,1)[12] : Inf
## Regression with ARIMA(0,2,4)(0,1,0)[12] errors : Inf
## Regression with ARIMA(0,2,4)(0,1,1)[12] errors : Inf
## Regression with ARIMA(0,2,4)(1,1,0)[12] errors : 590.0708
## Regression with ARIMA(0,2,5)(0,1,0)[12] errors : 590.8818
## Regression with ARIMA(1,2,0)(0,1,0)[12] errors : 624.3133
## Regression with ARIMA(1,2,0)(0,1,1)[12] errors : 622.3359
## Regression with ARIMA(1,2,0)(1,1,0)[12] errors : 624.5849
## Regression with ARIMA(1,2,0)(1,1,1)[12] errors : 624.5401
## Regression with ARIMA(1,2,1)(0,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,1)(0,1,1)[12] errors : Inf
## Regression with ARIMA(1,2,1)(1,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,1)(1,1,1)[12] errors : Inf
## Regression with ARIMA(1,2,2)(0,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,2)(0,1,1)[12] errors : Inf
## Regression with ARIMA(1,2,2)(1,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,2)(1,1,1)[12] errors : Inf
## Regression with ARIMA(1,2,3)(0,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,3)(0,1,1)[12] errors : 590.0542
## Regression with ARIMA(1,2,3)(1,1,0)[12] errors : Inf
## Regression with ARIMA(1,2,4)(0,1,0)[12] errors : 590.9442
## Regression with ARIMA(2,2,0)(0,1,0)[12] errors : 594.9202
## Regression with ARIMA(2,2,0)(0,1,1)[12] errors : 593.8099
## Regression with ARIMA(2,2,0)(1,1,0)[12] errors : 594.9902
## Regression with ARIMA(2,2,0)(1,1,1)[12] errors : 598.047
## Regression with ARIMA(2,2,1)(0,1,0)[12] errors : 583.7067
## Regression with ARIMA(2,2,1)(0,1,1)[12] errors : 582.0326
## Regression with ARIMA(2,2,1)(1,1,0)[12] errors : 583.0498
## Regression with ARIMA(2,2,1)(1,1,1)[12] errors : Inf
## Regression with ARIMA(2,2,2)(0,1,0)[12] errors : 586.8934
## Regression with ARIMA(2,2,2)(0,1,1)[12] errors : 585.4786
## Regression with ARIMA(2,2,2)(1,1,0)[12] errors : 586.4878
## Regression with ARIMA(2,2,3)(0,1,0)[12] errors : Inf
## Regression with ARIMA(3,2,0)(0,1,0)[12] errors : 594.6178
## Regression with ARIMA(3,2,0)(0,1,1)[12] errors : 592.4205
## Regression with ARIMA(3,2,0)(1,1,0)[12] errors : 594.5297
## Regression with ARIMA(3,2,0)(1,1,1)[12] errors : 597.7892
## Regression with ARIMA(3,2,1)(0,1,0)[12] errors : 586.8978
## Regression with ARIMA(3,2,1)(0,1,1)[12] errors : 585.4799
## Regression with ARIMA(3,2,1)(1,1,0)[12] errors : 586.4936
## Regression with ARIMA(3,2,2)(0,1,0)[12] errors : 590.2986
## Regression with ARIMA(4,2,0)(0,1,0)[12] errors : 594.3236
## Regression with ARIMA(4,2,0)(0,1,1)[12] errors : 594.4433
## Regression with ARIMA(4,2,0)(1,1,0)[12] errors : 595.68
## Regression with ARIMA(4,2,1)(0,1,0)[12] errors : 589.9171
## Regression with ARIMA(5,2,0)(0,1,0)[12] errors : 591.9875
##
##
##
## Best model: Regression with ARIMA(2,2,1)(0,1,1)[12] errors
9.6 Step 5: Check residuals
checkresiduals(model1)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,2,1)(0,1,1)[12] errors
## Q* = 12.358, df = 6, p-value = 0.05443
##
## Model df: 4. Total lags used: 10
Estimate parameters and confidence intervals
summary(model1)
## Series: quet.ts
## Regression with ARIMA(2,2,1)(0,1,1)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sma1 step ramp
## -0.8877 -0.6860 -0.9847 -0.6883 -3374.9701 -1512.6228
## s.e. 0.1267 0.1278 0.1742 0.5968 594.9716 141.0875
##
## sigma^2 = 643481: log likelihood = -281.86
## AIC=577.72 AICc=582.03 BIC=588.41
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 90.03804 612.6693 406.6706 0.32673 1.631969 0.07880153 -0.03762714
confint(model1)
## 2.5 % 97.5 %
## ar1 -1.1360795 -0.6394194
## ar2 -0.9365305 -0.4354141
## ma1 -1.3260894 -0.6434103
## sma1 -1.8579336 0.4813390
## step -4541.0931265 -2208.8471546
## ramp -1789.1491264 -1236.0964099
Plot data to visualize time series
options(scipen=5)
plot(quet.ts, xlim=c(2011,2015), ylim=c(0,40000), type='l', col="blue", xlab="Month", ylab="Dispensings")
# Add vertical line indicating date of intervention (January 1, 2014)
abline(v=2014, col="gray", lty="dashed", lwd=2)
lines(fitted(model1), col="red", type="b")
9.7 Step 6: Calculate forecasts
To forecast the counterfactual, model data excluding post-intervention time period
<- Arima(window(quet.ts, end=c(2013,12)),
model2 order=c(2,2,1),
seasonal=list(order=c(0,1,1), period=12))
Forecast 12 months post-intervention and convert to time series object
<- forecast(model2, h=12)
fc <- ts(as.numeric(fc$mean), start=c(2014,1), frequency=12) fc.ts
Combine with observed data
.2 <- ts.union(quet.ts, fc.ts) quet.ts
Plot forecast
plot(quet.ts.2, type="l", plot.type="s", col=c('blue','red'), xlab="Month", ylab="Dispensings", xlim=c(2011,2015), ylim=c(0,40000))
abline(v=2014, lty="dashed", col="gray")