Adding exog variable into SARIMAX grid search

Question

I've followed Jason Brownlee's excellent tutorial on SARIMAX grid searching, and have got it working when predicting using a univariate series. However, I need to add in an exogenous variable. I know that to do this, you need to add in an exog argument to the SARIMAX call, and have this working in my main script, but can't work out how to include it into the range of function used in the tutorial.

My code is below:

# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend, exog = config
    # define model
    model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history))
    return yhat[0]

# root mean squared error or rmse
def measure_rmse(actual, predicted):
    return sqrt(mean_squared_error(actual, predicted))

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
    return data[:-n_test], data[-n_test:]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
    predictions = list()
    # split dataset
    train, test = train_test_split(data, n_test)
    # seed history with training dataset
    history = [x for x in train]
    # step over each time-step in the test set
    for i in range(len(test)):
        # fit model and make forecast for history
        yhat = sarima_forecast(history, cfg)
        # store forecast in list of predictions
        predictions.append(yhat)
        # add actual observation to history for the next loop
        history.append(test[i])
    # estimate prediction error
    error = measure_rmse(test, predictions)
    return error

# grid search configs
def grid_search(data, cfg_list, n_test, parallel=True):
    scores = None
    if parallel:
        # execute configs in parallel
        executor = Parallel(n_jobs=cpu_count(), backend='multiprocessing')
        tasks = (delayed(score_model)(data, n_test, cfg) for cfg in cfg_list)
        scores = executor(tasks)
    else:
        scores = [score_model(data, n_test, cfg) for cfg in cfg_list]
    # remove empty results
    scores = [r for r in scores if r[1] != None]
    # sort configs by error, asc
    scores.sort(key=lambda tup: tup[1])
    return scores

# score a model, return None on failure
def score_model(data, n_test, cfg, debug=False):
    result = None
    # convert config to a key
    key = str(cfg)
    # show all warnings and fail on exception if debugging
    if debug:
        result = walk_forward_validation(data, n_test, cfg)
    else:
        # one failure during model validation suggests an unstable config
        try:
            # never show warnings when grid searching, too noisy
            with catch_warnings():
                filterwarnings("ignore")
                result = walk_forward_validation(data, n_test, cfg)
        except:
            error = None
    # check for an interesting result
    if result is not None:
        print(' > Model[%s] %.3f' % (key, result))
    return (key, result)

# create a set of sarima configs to try
def sarima_configs(exog, seasonal=[0]):
  models = list()
  # define config lists
  e = exog
  p_params = [0, 1, 2]
  d_params = [0, 1]
  q_params = [0, 1, 2]
  t_params = ['n','c','t','ct']
  P_params = [0, 1, 2]
  D_params = [0, 1]
  Q_params = [0, 1, 2]
  m_params = seasonal
    # create config instances
  for p in p_params:
    for d in d_params:
      for q in q_params:
        for t in t_params:
          for P in P_params:
            for D in D_params:
              for Q in Q_params:
                for m in m_params:
                  
                  cfg = [(p,d,q), (P,D,Q,m), t, e]
                  models.append(cfg)
  return models
data = difference.values
    
exog = high_f.values

cfg_list = sarima_configs(exog, seasonal=[0,4])

scores = grid_search(data, cfg_list, n_test)

print('done')

for cfg, error in scores[:3]:
      print(cfg, error)

sarimax_scores = pd.DataFrame(list(scores), columns=['config','error'])

The above is an extract, so apologies if I'm missing anything in the core code. I tried various ways of adding them in, including adding additional arguments to several of the functions, but as they are all nested, none seemed to work, so I decided to add it as a part of the configs.

For reference, both data and exog are arrays of length 31.

Currently scores is coming back as [], which means I can't use this in my main model. Any help would be much appreciated.

score 0 · Answer 1 · answered Aug 08 '21 at 08:39

I've realised I just needed to add the exogenous variable to the predict function, so it now works with:

# one-step sarima forecast
def sarima_forecast(history, config):
    order, sorder, trend, exog = config
    # define model
    model = SARIMAX(history, exog=exog[:len(history)].tolist(), order=order, seasonal_order=sorder, trend=trend, enforce_stationarity=False, enforce_invertibility=False)
    # fit model
    model_fit = model.fit(disp=False)
    # make one step forecast
    yhat = model_fit.predict(len(history), len(history), exog=exog.iloc[:len(history)+1]
    return yhat[0]

Worth noting that I also needed to revert exog to a df, not an array.

Adding exog variable into SARIMAX grid search

1 Answers1