library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MLTools)
## __ __ _ _________ | \ / | | | |___ ___| _____ _____ _ _____ | \/ | | | ____ | |
## | _ | | _ | | | | ___| | |\ /| | | | |____| | | | | | | | | | | | | |___ | | |
## \/ | | | |____ | | | |_| | | |_| | | |__ ___| | |_| |_| |______| |_| |_____|
## |_____| |____| |_____|
##
## Learning is fun, Machine Learning is funnier
## ----------------------------------------- With great power comes great
## responsibility ----------------------------------------- Created by: Jos<e9>
## Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu> Guillermo Mestre Marcos
## <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
##
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma 2.5 ✔ expsmooth 2.3
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
##
## The following object is masked from 'package:readr':
##
## spec
##
## The following objects are masked from 'package:stats':
##
## acf, arima
##
## The following object is masked from 'package:utils':
##
## tar
library(lmtest) #contains coeftest function
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) #contains adf.test function
library(Hmisc) # for computing lagged variables
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(lubridate)
library(readxl)
######################################################################################################
######################################################################################################
# Level Shift (LS) Example
######################################################################################################
######################################################################################################
set.seed(123)
n <- 200
# Simulate a clean ARIMA(1,0,1) process
phi <- 0.6
theta <- 0.3
e <- rnorm(n)
y_clean <- numeric(n)
y_clean[1] <- e[1]
for (t in 2:n) {
y_clean[t] <- phi*y_clean[t-1] + e[t] + theta*e[t-1]
}
# Introduce a level shift at time T = 100
T <- 100
omega <- 10 # shift magnitude
yLS <- y_clean
yLS[T:n] <- yLS[T:n] + omega # permanent shift from T onward
yLS <- ts(yLS)
y_clean <- ts(y_clean)
autoplot(yLS, series = "Series with Level Shift") +
autolayer(y_clean, series = "Clean Series", alpha = 0.5) +
ggtitle("Time Series with Level Shift at t=100")

# Fit a baseline ARIMA model (ignoring the level shift)
ggtsdisplay(yLS, main="Time Series with LS")

ggtsdisplay(diff(yLS), main="Time Series with LS")

yLS_arima <- Arima(yLS, order = c(4, 1, 0), include.mean = FALSE)
coeftest(yLS_arima)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.101808 0.069889 -1.4567 0.14520
## ar2 -0.178378 0.070686 -2.5235 0.01162 *
## ar3 0.009632 0.070791 0.1361 0.89177
## ar4 -0.174789 0.070249 -2.4881 0.01284 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
CheckResiduals.ICAI(yLS_arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,1,0)
## Q* = 8.8434, df = 6, p-value = 0.1826
##
## Model df: 4. Total lags used: 10
autoplot(residuals(yLS_arima)) + ggtitle("Residuals of LS ARIMA Model")

boxplot(residuals(yLS_arima), main = "Residuals of Baseline ARIMA Model")

which(abs(yLS_arima$residuals) > (3 * sd(yLS_arima$residuals)))
## [1] 100
# tsouliers function in forecast package
forecast::tsoutliers(yLS)
## $index
## integer(0)
##
## $replacements
## numeric(0)
yLS_adj <- tsclean(yLS)
autoplot(yLS_adj, series = "LS adjusted time series") +
autolayer(yLS, series = "Non adjusted time series", alpha = 0.5) +
ggtitle("Adjusted series with forecast tsoutliers function (non effective for LS)")

# Use tsoutliers library (different from the above)
# to detect outliers including level shifts
library(tsoutliers)
out_ls <- tsoutliers::tso(yLS, tsmethod="auto.arima")
print(out_ls)
## Series: yLS
## Regression with ARIMA(2,0,1) errors
##
## Coefficients:
## ar1 ar2 ma1 LS100
## -0.1004 0.4565 0.9426 9.5893
## s.e. 0.0963 0.0924 0.0579 0.2632
##
## sigma^2 = 0.8457: log likelihood = -265.67
## AIC=541.33 AICc=541.64 BIC=557.82
##
## Outliers:
## type ind time coefhat tstat
## 1 LS 100 100 9.589 36.44
plot(out_ls) # gives diagnostics and shows detected LS

y_tso <- out_ls$yadj
autoplot(y_tso, series = "tsoutliers adjusted series") +
autolayer(yLS, series = "Non adjusted time series", alpha = 0.5) +
ggtitle("Adjusted series after LS Removal with tsoutliers package")

# Transfer function model with step function
# Create a regressors vector for the level shift
T <- 100
xLS <- rep(0, n)
xLS[T:n] <- 1
xLS <- ts(xLS)
autoplot(xLS) + ggtitle("Level Shift Regressor")

TF_LS <- arima(yLS,
order=c(1, 0, 0),
xtransf = xLS,
transfer = list(c(0,9)), #List with (r,s) orders
include.mean = TRUE,
method="ML")
# Check regression error to see the need of differentiation
TF.RegressionError.plot(yLS, xLS, TF_LS, lag.max = 20)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

TF_LS <- arima(yLS,
order=c(1, 1, 0),
xtransf = xLS,
transfer = list(c(0,9)), #List with (r,s) orders
include.mean = TRUE,
method="ML")
# Check regression error to see the need of differentiation
TF.RegressionError.plot(yLS, xLS, TF_LS, lag.max = 20)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NOTE: If this regression error is not stationary in variance,boxcox should be applied to input and output series.
# Check numerator coefficients of explanatory variable
TF.Identification.plot(xLS, TF_LS)

## Estimate Std. Error z value Pr(>|z|)
## T1-MA0 7.878290783 1.022408 7.705624655 1.302048e-14
## T1-MA1 -1.154162615 1.019668 -1.131900235 2.576764e-01
## T1-MA2 0.368062963 1.019631 0.360976482 7.181170e-01
## T1-MA3 0.008569109 1.019631 0.008404126 9.932946e-01
## T1-MA4 -0.248000451 1.019631 -0.243225656 8.078306e-01
## T1-MA5 -0.783163382 1.019631 -0.768085003 4.424367e-01
## T1-MA6 0.255283338 1.019631 0.250368324 8.023025e-01
## T1-MA7 -0.314408273 1.019632 -0.308354755 7.578124e-01
## T1-MA8 -1.298520216 1.019717 -1.273411839 2.028719e-01
## T1-MA9 0.338144170 1.027313 0.329153871 7.420394e-01
#### Fit arima noise with selected
xlag = Lag(xLS, 0) # b
xlag[is.na(xlag)]=0
TF_LS <- arima(yLS,
order=c(4,1,0),
xtransf = xLS,
transfer = list(c(0, 0)), #List with (r,s) orders
include.mean = FALSE,
method="ML")
summary(TF_LS) # summary of training errors and estimated coefficients
##
## Call:
## arima(x = yLS, order = c(4, 1, 0), include.mean = FALSE, method = "ML", xtransf = xLS,
## transfer = list(c(0, 0)))
##
## Coefficients:
## ar1 ar2 ar3 ar4 T1-MA0
## -0.0474 -0.3138 -0.0231 -0.2442 8.1589
## s.e. 0.0699 0.0698 0.0698 0.0695 0.9188
##
## sigma^2 estimated as 0.941: log likelihood = -276.51, aic = 563.01
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.008214382 0.9676193 0.770536 -12.00873 140.9615 0.933116
## ACF1
## Training set -0.009481588
coeftest(TF_LS) # statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.047444 0.069943 -0.6783 0.4975713
## ar2 -0.313785 0.069837 -4.4931 7.02e-06 ***
## ar3 -0.023098 0.069752 -0.3311 0.7405339
## ar4 -0.244249 0.069514 -3.5137 0.0004419 ***
## T1-MA0 8.158872 0.918797 8.8799 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(TF_LS)

# Check residuals
CheckResiduals.ICAI(TF_LS, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,1,0)
## Q* = 49.371, df = 45, p-value = 0.3028
##
## Model df: 5. Total lags used: 50
### Cross correlation residuals - expl. variable
res <- residuals(TF_LS)
res[is.na(res)] <- 0
ccf(y = res, x = xLS)

autoplot(yLS, series = "Observed") +
autolayer(fitted(TF_LS), series = "LS intervention") +
autolayer(yLS_arima$fitted, series = "Arima") +
theme(legend.position = "top")

autoplot(yLS, series = "Observed") +
autolayer(y_tso, series = "tsoutliers package adjustment") +
theme(legend.position = "top")

autoplot(yLS, series = "Observed") +
autolayer(fitted(TF_LS)) +
theme(legend.position = "top")

coefficients(TF_LS) # Transfer function
## ar1 ar2 ar3 ar4 T1-MA0
## -0.04744367 -0.31378534 -0.02309810 -0.24424916 8.15887168
coefficients(yLS_arima) # Arima for unadjusted time series
## ar1 ar2 ar3 ar4
## -0.10180808 -0.17837822 0.00963203 -0.17478850