I have multiple time series that I would like to forecast with GluonTS, then concatenate so my result is a pandas data frame with the column headers date
, y
(the target), series
(the series number).
The problem is that GluonTS produces a generator. I can look at each series with next(iter(forecast_id))
, but I would like to stack all of the forecasts together to make it easier to export as a csv.
How can I stack the forecasts from all series into one pandas dataframe?
import pandas as pd
import numpy as np
import mxnet as mx
from mxnet import gluon
from gluonts.dataset import common
from gluonts.model.baseline import SeasonalNaivePredictor
from gluonts.trainer import Trainer
from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.dataset.util import to_pandas
N = 10 # number of time series
T = 100 # number of timesteps
prediction_length = 24
custom_dataset = np.random.normal(size=(N, T))
start = pd.Timestamp("01-01-2019", freq='1H')
# train dataset: cut the last window of length "prediction_length", add "target" and "start" fields
train_ds = [{'target': x, 'start': start} for x in custom_dataset[:, :-prediction_length]]
# test dataset: use the whole dataset, add "target" and "start" fields
test_ds = [{'target': x, 'start': start} for x in custom_dataset]
predictor = SeasonalNaivePredictor(
prediction_length=prediction_length,
season_length=24,
freq='1H'
)
forecast_it, ts_it = make_evaluation_predictions(
dataset=test_ds, # test dataset
predictor=predictor, # predictor
num_samples=100, # number of sample paths we want for evaluation
)
test_entry = next(iter(forecast_it))
print(test_entry)
> gluonts.model.forecast.SampleForecast(freq="1H", info=None, item_id=None, samples=numpy.array([[-1.078548550605774, 0.3002452254295349, 0.1025903970003128, -1.6613410711288452, -0.2776057720184326, -0.020864564925432205, -1.9355241060256958, 1.0598571300506592, 0.16316552460193634, -0.9441472887992859, 2.7307169437408447, -0.35861697793006897, 0.22022956609725952, 0.8052476048469543, -1.1194337606430054, 0.05703512206673622, -1.1357367038726807, -2.544445037841797, 1.2661969661712646, 0.17130693793296814, 0.8647393584251404, -1.9620181322097778, -0.5465423464775085, 0.26572829484939575]], numpy.dtype("float32")), start_date=pandas.Timestamp("2019-01-04 04:00:00", freq="H"))