#################################################################################
############## Forecasting: FT ###############################
############## ----------- solution --------- ############################
#################################################################################
library(MLTools)
## __ __ _ _________
## | \ / | | | |___ ___| _____ _____ _ _____
## | \/ | | | ____ | | | _ | | _ | | | | ___|
## | |\ /| | | | |____| | | | | | | | | | | | | |___ |
## | | \/ | | | |____ | | | |_| | | |_| | | |__ ___| |
## |_| |_| |______| |_| |_____| |_____| |____| |_____|
##
## Learning is fun, Machine Learning is funnier
## -----------------------------------------
## With great power comes great responsibility
## -----------------------------------------
## Created by: Jos<e9> Portela Gonz<e1>lez <Jose.Portela@iit.comillas.edu>
## Guillermo Mestre Marcos <Guillermo.Mestre@comillas.edu> Jaime Pizarroso Gonzalo
## <jpizarroso@comillas.edu> Antonio Mu<f1>oz San Roque
## <antonio.munoz@iit.comillas.edu>
##
## Escuela T<e9>cnica Superior de Ingenier<ed>a ICAI
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.5 ──
## ✔ ggplot2 3.5.2 ✔ fma 2.5
## ✔ forecast 8.24.0 ✔ expsmooth 2.3
library(ggplot2)
library(TSA)
## Registered S3 methods overwritten by 'TSA':
## method from
## fitted.Arima forecast
## plot.Arima forecast
##
## Attaching package: 'TSA'
##
## The following objects are masked from 'package:stats':
##
## acf, arima
##
## The following object is masked from 'package:utils':
##
## tar
library(lmtest) #contains coeftest function
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries) #contains adf.test function
library(Hmisc) # for computing lagged variables
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, units
## Set working directory ---------------------------------------------------------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# # Code to import data from Python statsmodels via reticulate
# library(reticulate)
# library(tidyverse)
# use_condaenv("fc", required = TRUE)
#
# # Import the statsmodels module
# sm <- import("statsmodels.api")
# # Load the dataset into a pandas DataFrame
# macro_data <- py_to_r(sm$datasets$macrodata$load_pandas()$data)
# macro_data <- macro_data %>%
# select(year, quarter, realgdp, realcons, unemp)
#
# write.csv(macro_data, file = "macro_data.csv", row.names = FALSE)
## Load dataset -------------------------------------------------------------------------------------------------------
fdata <- read.csv("macro_data.csv")
# View first few rows
head(fdata)
## year quarter realgdp realcons unemp
## 1 1959 1 2710.349 1707.4 5.8
## 2 1959 2 2778.801 1733.7 5.1
## 3 1959 3 2775.488 1751.8 5.3
## 4 1959 4 2785.204 1753.7 5.6
## 5 1960 1 2847.699 1770.5 5.2
## 6 1960 2 2834.390 1792.9 5.2
# Convert to time series object
freq <- 4
fdata_ts <- ts(fdata[,-(1:2)], frequency = freq)
head(fdata_ts)
## realgdp realcons unemp
## 1 Q1 2710.349 1707.4 5.8
## 1 Q2 2778.801 1733.7 5.1
## 1 Q3 2775.488 1751.8 5.3
## 1 Q4 2785.204 1753.7 5.6
## 2 Q1 2847.699 1770.5 5.2
## 2 Q2 2834.390 1792.9 5.2
autoplot(fdata_ts, facets = TRUE)

ggAcf(fdata_ts[,1], lag= 15 * freq)

# Scales
scale_y <- 1000
scale_x <- c(1000,1)
# Create time series and scale values
y <- fdata_ts[ ,1]/scale_y
x1 <- fdata_ts[ , 2]/scale_x[1]
x2 <- fdata_ts[ , 3]/scale_x[2]
x <- cbind(x1, x2)
fdata_ts <- cbind(y, x1, x2)
head(fdata_ts)
## y x1 x2
## 1 Q1 2.710349 1.7074 5.8
## 1 Q2 2.778801 1.7337 5.1
## 1 Q3 2.775488 1.7518 5.3
## 1 Q4 2.785204 1.7537 5.6
## 2 Q1 2.847699 1.7705 5.2
## 2 Q2 2.834390 1.7929 5.2
autoplot(fdata_ts, facets = TRUE)

y_orig <- fdata_ts[,1]
# Length of training and validation subsets
len_TV <- 12
len_TR <- nrow(fdata_ts) - len_TV
# Use it to create train and test subsets
ind <- seq(1:nrow(fdata))
ind_TR <- ind <= len_TR
ind_TV <- !ind_TR
# Training subset
y.tr <- ts(y[ind_TR], frequency = freq)
x.tr <- ts(x[ind_TR,], frequency = freq)
autoplot(cbind(y.tr,x.tr), facets = TRUE)

# Test subset
y.ts <- ts(y[ind_TV], frequency = freq)
# ¡Cuidado, no hemos usado start!
head(y.ts)
## Qtr1 Qtr2 Qtr3 Qtr4
## 1 13.06068 13.09990 13.20398 13.32111
## 2 13.39125 13.36687
y.ts <- ts(y[ind_TV], frequency = freq,
start = end(y.tr) + c(0,1))
head(y.ts)
## Qtr1 Qtr2 Qtr3 Qtr4
## 48 13.06068
## 49 13.09990 13.20398 13.32111 13.39125
## 50 13.36687
# sometimes you don't have the test output
# but we assume here that you have the test inputs
x.ts <- ts(x[ind_TV,], frequency = freq,
start = end(x.tr) + c(0,1))
head(x.ts)
## x1 x2
## 48 Q4 9.1816 4.4
## 49 Q1 9.2651 4.5
## 49 Q2 9.2915 4.5
## 49 Q3 9.3356 4.7
## 49 Q4 9.3636 4.8
## 50 Q1 9.3496 4.9
autoplot(tail(y.tr, 50), series = "Training", color="blue") +
forecast::autolayer(y.ts, series = "Test", color = "red") +
geom_vline(xintercept = time(tail(y.tr, 10)), linetype="dashed", size=0.5)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Model identification
ggtsdisplay(y.tr, lag=50)

# BoxCox.lambda.plot(y.tr, window = freq)
BoxCox.lambda.plot(y.tr)
## `geom_smooth()` using formula = 'y ~ x'

## log.ma
## 0.2876229
## Identification and fitting process -------------------------------------------------------------------------------------------------------
#(The different steps followed are represented for teaching purposes)
## FIRST --------------------------------------------
#### Fit initial FT model with large s
#This arima function belongs to the TSA package
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,0,0),
# seasonal = list(order=c(1,0,0), period=freq),
transfer = list(c(0,9),c(0,9)),
method="ML")
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regular differentiation is needed and coefficients of explanatory variables cannot be interpreted
## SECOND --------------------------------------------
#### Fit initial FT model with large s including regular differentiation
TF.fit <- arima(y.tr,
xtransf = x.tr,
order=c(1,1,0),
# seasonal = list(order=c(1,0,0),period=freq),
transfer = list(c(0,9),c(0,9)),
method="ML")
summary(TF.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(1, 1, 0), method = "ML", xtransf = x.tr, transfer = list(c(0,
## 9), c(0, 9)))
##
## Coefficients:
## ar1 x1-MA0 x1-MA1 x1-MA2 x1-MA3 x1-MA4 x1-MA5 x1-MA6
## -0.2448 0.9126 0.3677 0.1395 -0.1883 0.0586 -0.1506 -0.0461
## s.e. 0.0733 0.1025 0.1119 0.1125 0.1149 0.1156 0.1152 0.1157
## x1-MA7 x1-MA8 x1-MA9 x2-MA0 x2-MA1 x2-MA2 x2-MA3 x2-MA4
## -0.0619 0.1785 0.1090 -0.0545 -0.0117 0.0149 -0.0022 0.0012
## s.e. 0.1157 0.1159 0.1113 0.0124 0.0158 0.0155 0.0153 0.0153
## x2-MA5 x2-MA6 x2-MA7 x2-MA8 x2-MA9
## -0.0127 0.0025 0.0026 0.0095 -0.0056
## s.e. 0.0152 0.0150 0.0150 0.0148 0.0103
##
## sigma^2 estimated as 0.001092: log likelihood = 360.36, aic = -678.72
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0030248 0.03294917 0.02551225 0.06547611 0.3936668 0.4036532
## ACF1
## Training set -0.02925025
coeftest(TF.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 -0.2448429 0.0733336 -3.3388 0.0008415 ***
## x1-MA0 0.9126065 0.1025226 8.9015 < 2.2e-16 ***
## x1-MA1 0.3676982 0.1119134 3.2856 0.0010178 **
## x1-MA2 0.1394587 0.1124597 1.2401 0.2149465
## x1-MA3 -0.1883219 0.1149311 -1.6386 0.1013043
## x1-MA4 0.0586236 0.1155526 0.5073 0.6119217
## x1-MA5 -0.1506487 0.1152302 -1.3074 0.1910866
## x1-MA6 -0.0461404 0.1157331 -0.3987 0.6901295
## x1-MA7 -0.0619178 0.1157350 -0.5350 0.5926523
## x1-MA8 0.1784654 0.1159314 1.5394 0.1237053
## x1-MA9 0.1090172 0.1113263 0.9793 0.3274525
## x2-MA0 -0.0545344 0.0123805 -4.4049 1.059e-05 ***
## x2-MA1 -0.0116954 0.0157718 -0.7415 0.4583657
## x2-MA2 0.0148546 0.0155276 0.9567 0.3387399
## x2-MA3 -0.0021988 0.0153007 -0.1437 0.8857335
## x2-MA4 0.0012191 0.0153276 0.0795 0.9366062
## x2-MA5 -0.0127058 0.0152362 -0.8339 0.4043249
## x2-MA6 0.0025426 0.0149971 0.1695 0.8653696
## x2-MA7 0.0026165 0.0150071 0.1744 0.8615893
## x2-MA8 0.0095000 0.0147512 0.6440 0.5195661
## x2-MA9 -0.0055924 0.0103251 -0.5416 0.5880764
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check regression error to see the need of differentiation
TF.RegressionError.plot(y.tr,x.tr,TF.fit,lag.max = 50)
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Regression error is stationary, then, we can continue
#NOTE: If this regression error was not stationary in variance,boxcox should be applied to input and output series.
#Check numerator coefficients of explanatory variable
TF.Identification.plot(x.tr,TF.fit)
## [1] "x1"
## Estimate Std. Error z value Pr(>|z|)
## x1-MA0 0.91260651 0.1025226 8.9015152 5.508941e-19
## x1-MA1 0.36769822 0.1119134 3.2855616 1.017793e-03
## x1-MA2 0.13945874 0.1124597 1.2400780 2.149465e-01
## x1-MA3 -0.18832188 0.1149311 -1.6385631 1.013043e-01
## x1-MA4 0.05862359 0.1155526 0.5073323 6.119217e-01
## x1-MA5 -0.15064874 0.1152302 -1.3073715 1.910866e-01
## x1-MA6 -0.04614041 0.1157331 -0.3986794 6.901295e-01
## x1-MA7 -0.06191781 0.1157350 -0.5349964 5.926523e-01
## x1-MA8 0.17846544 0.1159314 1.5394058 1.237053e-01
## x1-MA9 0.10901718 0.1113263 0.9792580 3.274525e-01
## [1] "x2"
## Estimate Std. Error z value Pr(>|z|)
## x2-MA0 -0.054534361 0.01238050 -4.40485820 1.058531e-05
## x2-MA1 -0.011695449 0.01577182 -0.74154068 4.583657e-01
## x2-MA2 0.014854636 0.01552764 0.95665794 3.387399e-01
## x2-MA3 -0.002198784 0.01530069 -0.14370491 8.857335e-01
## x2-MA4 0.001219098 0.01532761 0.07953608 9.366062e-01
## x2-MA5 -0.012705826 0.01523623 -0.83392209 4.043249e-01
## x2-MA6 0.002542645 0.01499706 0.16954291 8.653696e-01
## x2-MA7 0.002616509 0.01500710 0.17435145 8.615893e-01
## x2-MA8 0.009499993 0.01475121 0.64401432 5.195661e-01
## x2-MA9 -0.005592361 0.01032514 -0.54162566 5.880764e-01

## Transfer function parameters for more than one input
# Choose the values below using the identification plots
#x1:
b1=0; r1=1; s1=0;
#x2:
b2=0; r2=0; s2=0;
# Next apply the lags to the inputs
xlag <- x.tr
xlag[,1] <- Lag(x.tr[,1],b1) # b1
xlag[,2] <- Lag(x.tr[,2],b2) # b2
xlag[is.na(xlag)]=0
# and fit the arima model with those parameters
# and the ARMA structure (p, d, q)(P, D, Q)_s
arima.fit <- arima(y.tr,
xtransf = xlag,
order=c(0, 1, 1),
# seasonal = list(order=c(0,0,0),period=freq),
transfer = list(c(r1,s1), c(r2,s2)),
method="ML")
summary(arima.fit) #summary of training errors and estimated coefficients
##
## Call:
## arima(x = y.tr, order = c(0, 1, 1), method = "ML", xtransf = xlag, transfer = list(c(r1,
## s1), c(r2, s2)))
##
## Coefficients:
## ma1 x1-AR1 x1-MA0 x2-MA0
## -0.2989 0.0153 1.2520 -0.0527
## s.e. 0.0723 0.0169 0.0466 0.0068
##
## sigma^2 estimated as 0.001286: log likelihood = 362.66, aic = -717.33
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.005980813 0.03577307 0.02732755 0.1184703 0.4490564 0.4323748
## ACF1
## Training set -0.02735941
coeftest(arima.fit) #statistical significance of estimated coefficients
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.2988737 0.0722874 -4.1345 3.557e-05 ***
## x1-AR1 0.0152844 0.0168652 0.9063 0.3648
## x1-MA0 1.2520229 0.0465576 26.8919 < 2.2e-16 ***
## x2-MA0 -0.0527139 0.0068213 -7.7279 1.093e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Check residuals
CheckResiduals.ICAI(arima.fit, lag=50)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 42.195, df = 46, p-value = 0.6323
##
## Model df: 4. Total lags used: 50
#All coefficients significant --> Good.
#All correlations are small --> Good.
#Check Cross correlation residuals - expl. variables
res <- residuals(arima.fit)
res[is.na(res)] <- 0
ccf(y = res, x = x.tr[,1])

ccf(y = res, x = x.tr[,2])

########
#All expl.variables seem not correlated with residuals -> Good
## Finished model identification
#Check fitted
autoplot(y.tr, series = "Real")+
forecast::autolayer(fitted(arima.fit), series = "Fitted")

#Chech training errors of the model
accuracy(fitted(arima.fit),y.tr)
## ME RMSE MAE MPE MAPE ACF1
## Test set 0.005980813 0.03577307 0.02732755 0.1184703 0.4490564 -0.02735941
## Theil's U
## Test set 0.497303
# Here for h=12 we forecast all the 12 values at once
y_est <- TF.forecast(y.old = y.tr, #past values of the series
x.old = x.tr, #Past values of the explanatory variables
x.new = x.ts, #New values of the explanatory variables
model = arima.fit, #fitted transfer function model
h=12) #Forecast horizon
y_est
## [1] 13.11702 13.21805 13.25273 13.29793 13.32857 13.30632 13.28145 13.14555
## [9] 13.00587 12.95861 12.87557 12.93799
y_est <- ts(y_est, frequency = freq,
start = end(y.tr) + c(0,1))
# Plot the forecast and real values
autoplot(y.ts, series = "Test set (real values)") +
forecast::autolayer(y_est , series = "Forecast h=1 for the test") +
forecast::autolayer( tail(y.tr, 24), series = "Training data" )

# Accuracy in test
accuracy(y_est * scale_y, y.ts * scale_y)
## ME RMSE MAE MPE MAPE ACF1 Theil's U
## Test set 34.76367 91.57477 77.50328 0.2596796 0.5862351 0.6163587 0.8564871
# Saved values
write.table(y_est * scale_y, file = "Pred_TF_2inputs.dat",
col.names = FALSE, row.names = FALSE)
getwd()
## [1] "/Users/fernando/code/frcst_imat_public/2025_10_29_TransferFunctions_2"