library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MLTools)
## __ __ _ _________ | \ / | | | |___ ___| _____ _____ _ _____ | \/ | | | ____ | |
## | _ | | _ | | | | ___| | |\ /| | | | |____| | | | | | | | | | | | | |___ | | |
## \/ | | | |____ | | | |_| | | |_| | | |__ ___| | |_| |_| |______| |_| |_____|
## |_____| |____| |_____|
## 
## Learning is fun, Machine Learning is funnier
## ----------------------------------------- With great power comes great
## responsibility ----------------------------------------- Created by: Jos<e9>
## Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu> Guillermo Mestre Marcos
## <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
## 
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ fma       2.5     ✔ expsmooth 2.3
library(TSA)
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## 
## The following object is masked from 'package:readr':
## 
##     spec
## 
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## 
## The following object is masked from 'package:utils':
## 
##     tar
library(lmtest)  #contains coeftest function
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries) #contains adf.test function
library(Hmisc) # for computing lagged variables
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(lubridate)
library(readxl)



######################################################################################################
######################################################################################################
#  Level Shift (LS) Example
######################################################################################################
######################################################################################################


set.seed(123)
n <- 200

# Simulate a clean ARIMA(1,0,1) process
phi <- 0.6
theta <- 0.3
e <- rnorm(n)
y_clean <- numeric(n)
y_clean[1] <- e[1]
for (t in 2:n) {
  y_clean[t] <- phi*y_clean[t-1] + e[t] + theta*e[t-1]
}

# Introduce a level shift at time T = 100
T <- 100
omega <- 10  # shift magnitude
yLS <- y_clean
yLS[T:n] <- yLS[T:n] + omega  # permanent shift from T onward

yLS <- ts(yLS)
y_clean <- ts(y_clean)

autoplot(yLS, series = "Series with Level Shift") +
  autolayer(y_clean, series = "Clean Series", alpha = 0.5) +
  ggtitle("Time Series with Level Shift at t=100")

# Fit a baseline ARIMA model (ignoring the level shift)

ggtsdisplay(yLS, main="Time Series with LS")

ggtsdisplay(diff(yLS), main="Time Series with LS")

yLS_arima <- Arima(yLS, order = c(4, 1, 0), include.mean = FALSE)

coeftest(yLS_arima)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)  
## ar1 -0.101808   0.069889 -1.4567  0.14520  
## ar2 -0.178378   0.070686 -2.5235  0.01162 *
## ar3  0.009632   0.070791  0.1361  0.89177  
## ar4 -0.174789   0.070249 -2.4881  0.01284 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
CheckResiduals.ICAI(yLS_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)
## Q* = 8.8434, df = 6, p-value = 0.1826
## 
## Model df: 4.   Total lags used: 10
autoplot(residuals(yLS_arima)) + ggtitle("Residuals of LS ARIMA Model")

boxplot(residuals(yLS_arima), main = "Residuals of Baseline ARIMA Model")

which(abs(yLS_arima$residuals) > (3 * sd(yLS_arima$residuals)))
## [1] 100
# tsouliers function in forecast package
forecast::tsoutliers(yLS)
## $index
## integer(0)
## 
## $replacements
## numeric(0)
yLS_adj <- tsclean(yLS)
autoplot(yLS_adj, series = "LS adjusted time series") +
  autolayer(yLS, series = "Non adjusted time series", alpha = 0.5) +
  ggtitle("Adjusted series with forecast tsoutliers function (non effective for LS)")

# Use tsoutliers library (different from the above) 
# to detect outliers including level shifts

library(tsoutliers)
out_ls <- tsoutliers::tso(yLS, tsmethod="auto.arima")
print(out_ls)
## Series: yLS 
## Regression with ARIMA(2,0,1) errors 
## 
## Coefficients:
##           ar1     ar2     ma1   LS100
##       -0.1004  0.4565  0.9426  9.5893
## s.e.   0.0963  0.0924  0.0579  0.2632
## 
## sigma^2 = 0.8457:  log likelihood = -265.67
## AIC=541.33   AICc=541.64   BIC=557.82
## 
## Outliers:
##   type ind time coefhat tstat
## 1   LS 100  100   9.589 36.44
plot(out_ls)  # gives diagnostics and shows detected LS

y_tso <- out_ls$yadj
autoplot(y_tso, series = "tsoutliers adjusted series") +
  autolayer(yLS, series = "Non adjusted time series", alpha = 0.5) +
  ggtitle("Adjusted series after LS Removal with tsoutliers package")

# Transfer function model with step function

# Create a regressors vector for the level shift
T <- 100
xLS <- rep(0, n)
xLS[T:n] <- 1
xLS <- ts(xLS)
autoplot(xLS) + ggtitle("Level Shift Regressor")

TF_LS <- arima(yLS,
                order=c(1, 0, 0),
                xtransf = xLS,
                transfer = list(c(0,9)), #List with (r,s) orders
                include.mean = TRUE,
                method="ML")


# Check regression error to see the need of differentiation
TF.RegressionError.plot(yLS, xLS, TF_LS, lag.max = 20)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

TF_LS <- arima(yLS,
                order=c(1, 1, 0),
                xtransf = xLS,
                transfer = list(c(0,9)), #List with (r,s) orders
                include.mean = TRUE,
                method="ML")


# Check regression error to see the need of differentiation
TF.RegressionError.plot(yLS, xLS, TF_LS, lag.max = 20)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NOTE: If this regression error is not stationary in variance,boxcox should be applied to input and output series.


# Check numerator coefficients of explanatory variable
TF.Identification.plot(xLS, TF_LS)

##            Estimate Std. Error      z value     Pr(>|z|)
## T1-MA0  7.878290783   1.022408  7.705624655 1.302048e-14
## T1-MA1 -1.154162615   1.019668 -1.131900235 2.576764e-01
## T1-MA2  0.368062963   1.019631  0.360976482 7.181170e-01
## T1-MA3  0.008569109   1.019631  0.008404126 9.932946e-01
## T1-MA4 -0.248000451   1.019631 -0.243225656 8.078306e-01
## T1-MA5 -0.783163382   1.019631 -0.768085003 4.424367e-01
## T1-MA6  0.255283338   1.019631  0.250368324 8.023025e-01
## T1-MA7 -0.314408273   1.019632 -0.308354755 7.578124e-01
## T1-MA8 -1.298520216   1.019717 -1.273411839 2.028719e-01
## T1-MA9  0.338144170   1.027313  0.329153871 7.420394e-01
#### Fit arima noise with selected
xlag = Lag(xLS, 0)   # b
xlag[is.na(xlag)]=0
TF_LS <- arima(yLS,
                order=c(4,1,0),
                xtransf = xLS,
                transfer = list(c(0, 0)), #List with (r,s) orders
                include.mean = FALSE,
                method="ML")
summary(TF_LS) # summary of training errors and estimated coefficients
## 
## Call:
## arima(x = yLS, order = c(4, 1, 0), include.mean = FALSE, method = "ML", xtransf = xLS, 
##     transfer = list(c(0, 0)))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4  T1-MA0
##       -0.0474  -0.3138  -0.0231  -0.2442  8.1589
## s.e.   0.0699   0.0698   0.0698   0.0695  0.9188
## 
## sigma^2 estimated as 0.941:  log likelihood = -276.51,  aic = 563.01
## 
## Training set error measures:
##                       ME      RMSE      MAE       MPE     MAPE     MASE
## Training set 0.008214382 0.9676193 0.770536 -12.00873 140.9615 0.933116
##                      ACF1
## Training set -0.009481588
coeftest(TF_LS) # statistical significance of estimated coefficients
## 
## z test of coefficients:
## 
##         Estimate Std. Error z value  Pr(>|z|)    
## ar1    -0.047444   0.069943 -0.6783 0.4975713    
## ar2    -0.313785   0.069837 -4.4931  7.02e-06 ***
## ar3    -0.023098   0.069752 -0.3311 0.7405339    
## ar4    -0.244249   0.069514 -3.5137 0.0004419 ***
## T1-MA0  8.158872   0.918797  8.8799 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(TF_LS)

# Check residuals
CheckResiduals.ICAI(TF_LS, lag=50)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)
## Q* = 49.371, df = 45, p-value = 0.3028
## 
## Model df: 5.   Total lags used: 50
### Cross correlation residuals - expl. variable
res <- residuals(TF_LS)
res[is.na(res)] <- 0
ccf(y = res, x = xLS)

autoplot(yLS, series = "Observed") +
  autolayer(fitted(TF_LS), series = "LS intervention") +
  autolayer(yLS_arima$fitted, series = "Arima") + 
  theme(legend.position = "top")

autoplot(yLS, series = "Observed") +
  autolayer(y_tso, series = "tsoutliers package adjustment") + 
  theme(legend.position = "top")

autoplot(yLS, series = "Observed") +
  autolayer(fitted(TF_LS)) + 
  theme(legend.position = "top")

coefficients(TF_LS) # Transfer function
##         ar1         ar2         ar3         ar4      T1-MA0 
## -0.04744367 -0.31378534 -0.02309810 -0.24424916  8.15887168
coefficients(yLS_arima) # Arima for unadjusted time series  
##         ar1         ar2         ar3         ar4 
## -0.10180808 -0.17837822  0.00963203 -0.17478850