I've followed Jason Brownlee's excellent tutorial on SARIMAX grid searching, and have got it working when predicting using a univariate series. However, I need to add in an exogenous variable. I know that to do this, you need to add in an exog argument to the SARIMAX call, and have this working in my main script, but can't work out how to include it into the range of function used in the tutorial.
My code is below:
# one-step sarima forecast
def sarima_forecast(history, config):
order, sorder, trend, exog = config
# define model
model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
# fit model
model_fit = model.fit(disp=False)
# make one step forecast
yhat = model_fit.predict(len(history), len(history))
return yhat[0]
# root mean squared error or rmse
def measure_rmse(actual, predicted):
return sqrt(mean_squared_error(actual, predicted))
# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
return data[:-n_test], data[-n_test:]
# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
predictions = list()
# split dataset
train, test = train_test_split(data, n_test)
# seed history with training dataset
history = [x for x in train]
# step over each time-step in the test set
for i in range(len(test)):
# fit model and make forecast for history
yhat = sarima_forecast(history, cfg)
# store forecast in list of predictions
predictions.append(yhat)
# add actual observation to history for the next loop
history.append(test[i])
# estimate prediction error
error = measure_rmse(test, predictions)
return error
# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
scores = None
if parallel:
# execute configs in parallel
executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
scores = executor(tasks)
else:
scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
# remove empty results
scores = [r for r in scores if r[1] != None]
# sort configs by error, asc
scores.sort(key=lambda tup: tup[1])
return scores
# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
result = None
# convert config to a key
key = str(cfg)
# show all warnings and fail on exception if debugging
if debug:
result = walk_forward_validation(data, n_test, cfg)
else:
# one failure during model validation suggests an unstable config
try:
# never show warnings when grid searching, too noisy
with catch_warnings():
filterwarnings("ignore")
result = walk_forward_validation(data, n_test, cfg)
except:
error = None
# check for an interesting result
if result is not None:
print(' > Model[%s] %.3f' % (key, result))
return (key, result)
# create a set of sarima configs to try
def sarima_configs(exog, seasonal=[0]):
models = list()
# define config lists
e = exog
p_params = [0, 1, 2]
d_params = [0, 1]
q_params = [0, 1, 2]
t_params = ['n','c','t','ct']
P_params = [0, 1, 2]
D_params = [0, 1]
Q_params = [0, 1, 2]
m_params = seasonal
# create config instances
for p in p_params:
for d in d_params:
for q in q_params:
for t in t_params:
for P in P_params:
for D in D_params:
for Q in Q_params:
for m in m_params:
cfg = [(p,d,q), (P,D,Q,m), t, e]
models.append(cfg)
return models
data = difference.values
exog = high_f.values
cfg_list = sarima_configs(exog, seasonal=[0,4])
scores = grid_search(data, cfg_list, n_test)
print('done')
for cfg, error in scores[:3]:
print(cfg, error)
sarimax_scores = pd.DataFrame(list(scores), columns=['config','error'])
The above is an extract, so apologies if I'm missing anything in the core code. I tried various ways of adding them in, including adding additional arguments to several of the functions, but as they are all nested, none seemed to work, so I decided to add it as a part of the configs.
For reference, both data
and exog
are arrays of length 31.
Currently scores
is coming back as []
, which means I can't use this in my main model. Any help would be much appreciated.