Lab. 7 Dynamic regression models- Seasonal example.

Author

Análisis y Predicción de Series Temporales, 3º B IMAT

Published

October 29, 2025

Preliminaries

Load libraries

library(MLTools)
library(fpp2)
library(ggplot2)
library(TSA)
library(lmtest)  #contains coeftest function
library(tseries) #contains adf.test function
library(Hmisc) # for computing lagged variables

Set working directory

setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

Load dataset, EDA and split train/test

fdata <- read.table("Seasonal_TF_1.dat",header = TRUE, sep = "")
head(fdata)
         Y       X
1 47453.75 27893.2
2 45357.50 26662.4
3 42732.85 25120.7
4 39380.79 23197.7
5 36787.20 21664.1
6 35161.46 20652.0
# If you need to change column names
colnames(fdata) <- c("Y","X")

Convert to time series object. In this case the frequency will be apparent in the time plot below:

freq <- 24
fdata_ts <- ts(fdata, frequency = freq)

and get an initial time plot (sometimes using head to zoom in helps)

autoplot(fdata_ts, facets = TRUE)

# autoplot(head(fdata_ts, 100), facets = TRUE)
ggtsdisplay(fdata_ts[,1], lag=5 * freq)

Scale values

When the range of the variables is very different, we recommend scaling them to facilitate the identification of the model.

y <- fdata_ts[,1]/100000
x <- fdata_ts[,2]/100000

Train/Test split

test_size = 4 * freq

y.TR <- subset(y, end = length(y) - test_size)
x.TR <- subset(x, end = length(y) - test_size)

tail(y.TR)
Time Series:
Start = c(38, 9) 
End = c(38, 14) 
Frequency = 24 
[1] 0.6172487 0.6435051 0.6712825 0.6591489 0.6705313 0.6350222
y.TV <- subset(y, start = length(y) - test_size + 1)
x.TV <- subset(x, start = length(y) - test_size + 1)

head(y.TV)
Time Series:
Start = c(38, 15) 
End = c(38, 20) 
Frequency = 24 
[1] 0.6321822 0.6121538 0.5945612 0.6297521 0.6489584 0.7017342

Identification and fitting process

ggtsdisplay(y, lag=4 * freq)

First TF model

Fit initial FT model with large s

# This arima function belongs to the TSA package
TF.fit <- arima(y.TR,
                order=c(1, 0, 0),
                seasonal = list(order=c(1, 0, 0), period=freq),
                xtransf = x.TR,
                transfer = list(c(0,9)), #List with (r,s) orders
                include.mean = TRUE,
                method="ML")
Warning in arima(y.TR, order = c(1, 0, 0), seasonal = list(order = c(1, :
possible convergence problem: optim gave code=1
# Check regression error to see the need of differentiation
TF.RegressionError.plot(y,x,TF.fit,lag.max = 100)
Warning: Removed 9 rows containing missing values or values outside the scale range
(`geom_point()`).

Second model

Seasonal differencing is required

NOTE: If this regression error is not stationary in variance,boxcox should be applied to input and output series.

TF.fit <- arima(y.TR,
                order=c(1, 0, 0),
                seasonal = list(order=c(1, 1, 0), period=freq),
                xtransf = x.TR,
                transfer = list(c(0,9)), #List with (r,s) orders
                include.mean = TRUE,
                method="ML")

Check regression error to see the need of differentiation

TF.RegressionError.plot(y,x,TF.fit,lag.max = 100)
Warning: Removed 9 rows containing missing values or values outside the scale range
(`geom_point()`).

Identify the transfer function parameters for the explanatory variable

TF.Identification.plot(x,TF.fit)

            Estimate  Std. Error      z value  Pr(>|z|)
T1-MA0  1.7001612456 0.002050621 829.09597813 0.0000000
T1-MA1 -0.0006336741 0.004507068  -0.14059564 0.8881894
T1-MA2 -0.0003059586 0.005373127  -0.05694238 0.9545911
T1-MA3  0.0041234214 0.005471515   0.75361605 0.4510798
T1-MA4 -0.0041941629 0.005472650  -0.76638610 0.4434466
T1-MA5 -0.0029931882 0.005472197  -0.54698105 0.5843917
T1-MA6  0.0028206119 0.005477613   0.51493451 0.6065988
T1-MA7  0.0051645575 0.005378007   0.96031059 0.3368989
T1-MA8 -0.0053266681 0.004485605  -1.18750278 0.2350294
T1-MA9  0.0011680561 0.002017348   0.57900585 0.5625852

Fit arima noise with the selected (b, r, s) and ARMA orders

p <- 0 ; d <- 0; q <- 1;
P <- 1 ; D <- 1; Q <- 0;

b <- 0; r <- 0; s <- 0;
xlag = Lag(x, b)   # b
xlag[is.na(xlag)]=0
arima.fit <- arima(y,
                   order=c(p, d, q),
                   seasonal = list(order=c(P, D, Q), period=freq),
                   xtransf = xlag,
                   transfer = list(c(r, s)), #List with (r,s) orders
                   include.mean = FALSE,
                   method="ML")

Diagnostics of the fitted model

summary(arima.fit) # summary of training errors and estimated coefficients

Call:
arima(x = y, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = freq), 
    include.mean = FALSE, method = "ML", xtransf = xlag, transfer = list(c(r, 
        s)))

Coefficients:
          ma1    sar1  T1-MA0
      -0.9035  0.6044  1.7000
s.e.   0.0143  0.0254  0.0001

sigma^2 estimated as 2.518e-07:  log likelihood = 6011.41,  aic = -12016.82

Training set error measures:
                        ME         RMSE         MAE          MPE       MAPE
Training set -2.297759e-05 0.0004957689 0.000387598 -0.005134353 0.07152603
                   MASE        ACF1
Training set 0.01473846 0.003615161

Check the statistical significance of estimated coefficients

coeftest(arima.fit)

z test of coefficients:

          Estimate  Std. Error   z value  Pr(>|z|)    
ma1    -9.0353e-01  1.4331e-02   -63.049 < 2.2e-16 ***
sar1    6.0437e-01  2.5379e-02    23.814 < 2.2e-16 ***
T1-MA0  1.7000e+00  5.8982e-05 28822.203 < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Check residuals

CheckResiduals.ICAI(arima.fit, lag=50)


    Ljung-Box test

data:  Residuals from ARIMA(0,0,1)(1,1,0)[24]
Q* = 43.741, df = 47, p-value = 0.6084

Model df: 3.   Total lags used: 50

Cross correlation between the residuals and the explanatory variable

res <- residuals(arima.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x)

Visual check of the fitted values vs real values in the training period

autoplot(y, series = "Real", size = 2, alpha=0.8, color="blue") +
  forecast::autolayer(fitted(arima.fit), series = "Fitted", color="yellow")

Training errors of the model

accuracy(fitted(arima.fit),y.TR)
                    ME         RMSE          MAE          MPE       MAPE
Test set -2.678238e-05 0.0004916385 0.0003846799 -0.005772267 0.07059664
               ACF1  Theil's U
Test set 0.02444929 0.01486522

Forecast for new data with h = 1

h <- 1

y.TV.est <- y * NA

for (i in seq(length(y.TR) + 1, length(y) - h, 1)){# loop for validation period
  y.TV.est[i] <- TF.forecast(y.old = subset(y,end=i-1), #past values of the series
                             x.old = subset(x,end=i-1), #Past values of the explanatory variables
                             x.new = subset(x,start = i,end=i), #New values of the explanatory variables
                             model = arima.fit, #fitted transfer function model
                             h=h)[h] #Forecast horizon
}

y.TV.est <- na.omit(y.TV.est)

Direct forecast:

y.TV.est2 <- TF.forecast(y.old = subset(y,end=i-1), #past values of the series
                             x.old = subset(x,end=i-1), #Past values of the explanatory variables
                             x.new = subset(x,start = i,end=i), #New values of the explanatory variables
                             model = arima.fit, #fitted transfer function model
                             h=length(y.TV)) #forecast horizon

Plot of the forecast

y.TV_ts <- ts(y.TV, frequency = freq, start = end(y.TR) + c(0, 1))
y.TV.est_ts <- ts(y.TV.est, frequency = freq, start = end(y.TR) + c(0, 1))

y.TV.est2_ts <- ts(y.TV.est2, frequency = freq, start = end(y.TR) + c(0, 1))
head(y.TV_ts)
Time Series:
Start = c(38, 15) 
End = c(38, 20) 
Frequency = 24 
[1] 0.6321822 0.6121538 0.5945612 0.6297521 0.6489584 0.7017342
autoplot(y.TV_ts, series = "Test set, real values", size=2, alpha=0.5, color="blue") +
  forecast::autolayer(y.TV.est_ts, series = "Forecast", color = "yellow") + 
  forecast::autolayer(y.TV.est2_ts, series = "Direct Forecast", color = "tan") +
  forecast::autolayer(tail(y.TR, freq), series = "Training data", color = "red") 

accuracy(y.TV.est*100000,y.TV*100000)
               ME    RMSE      MAE         MPE       MAPE       ACF1  Theil's U
Test set 2.320657 52.6324 40.90836 0.003130764 0.07884087 -0.1405088 0.01597259
accuracy(y.TV.est2*100000,y.TV*100000)
                ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
Test set -188.0198 3484.899 2772.375 -0.5456024 5.264209 0.4830753  1.042203