0

I am using the Darts python package for training a forecasting model on multiple time series. I am splitting my set of timeseries into test/train splits (without splitting individual timeseries) and doing a cross validated gridsearch without using the build-in gridsearch function so I can handle the data in the way I want. In order to reduce time, I am attempting to use multiprocessing for the gridsearch; however, when I get to model.fit(), I receive the following error:

AssertionError: daemonic processes are not allowed to have children

I assume this is because the model.fit() function is spawning its own child processes. I have been searching through the documentation, and though the built-in gridsearch() function does not handle the data the way I want to, it does allow for multiprocessing of model training/tuning using the n_jobs argument. This leads me to believe that I should be able to implement my own multiprocessing gridsearch, and this error is solvable.

Here is the relevant code (code prints Checkpoint 4 and ends):

# Perform k-fold cross validated gridsearch
    input_chunk_length = [14, 24]
    hidden_size = [25, 50, 75]
    n_rnn_layers = [1, 2]

    tuningdf = pd.DataFrame(columns=["p1", "p2", "p3", "Fold", "SMAPE"])
    print(k_folds + 1)
    smapes_list = []
    for param1 in input_chunk_length:
        for param2 in hidden_size:
            for param3 in n_rnn_layers:
                fold_smapes = []
                i=0
                for i in np.arange(k_folds):
                    print(i)
                    rowdf = pd.DataFrame([[0,0,0,0,None]], columns=["p1", "p2", "p3", "Fold", "SMAPE"])
                    rowdf['p1'][0] = param1
                    rowdf['p2'][0] = param2
                    rowdf['p3'][0] = param3
                    rowdf['Fold'][0] = i
                    tuningdf = pd.concat([tuningdf, rowdf])

    pool = Pool(os.cpu_count())
    tuned_list = [pool.apply(gridsearch, args=(row, kfold_flux, kfold_covariates)) for row in tuningdf.to_dict('records')]

def gridsearch(row, kfold_flux, kfold_covariates):
    param1 = row['p1']
    param2 = row['p2']
    param3 = row['p3']
    i = row['Fold']

    # Separate into testing and training datasets
    ktemp = kfold_flux.copy()
    flux_test = ktemp[i - 1]
    flux_train_folds = ktemp.pop(i - 1)
    flux_train = []
    k = 0
    for fold in flux_train_folds:
        if flux_train == []:
            flux_train = fold
        elif k == 1:
            flux_train = [flux_train, fold]
        else:
            flux_train.append(fold)
        k += 1
    print(len(flux_train))
    print(len(flux_test))
    cotemp = kfold_covariates.copy()
    covariates_test = cotemp[i - 1]
    covariates_train_folds = cotemp.pop(i - 1)

    covariates_train = []
    k = 0
    print("chekpoint 1")
    for fold in covariates_train_folds:
        if covariates_train == []:
            covariates_train = fold
        elif k == 1:
            covariates_train = [covariates_train, fold]
        else:
            covariates_train.append(fold)
        k += 1

    print("chekpoint 2")
    # Fit standardization on training data, transform training and testing data
    mmscaler = Scaler()
    flux_scaler = Scaler(n_jobs=1)
    flux_train_scaled = flux_scaler.fit_transform(flux_train)
    flux_test_scaled = flux_scaler.transform(flux_test)
    covariate_scaler = Scaler(n_jobs=1)
    covariates_train_scaled = covariate_scaler.fit_transform(covariates_train)
    covariates_test_scaled = covariate_scaler.transform(covariates_test)
    print("chekpoint 3")

    # Initiate and train model on parameter set
    lstm_model = 0
    lstm_model = BlockRNNModel(
        model="LSTM",
        input_chunk_length=param1,
        output_chunk_length=21,
        n_epochs=1,
        random_state=0,
        hidden_size=param2,
        n_rnn_layers=param3,
    )

    print("chekpoint 4")
    lstm_model.fit(
        series=flux_train_scaled,
        past_covariates=covariates_train_scaled,
        verbose=True,
        num_loader_workers=1,
    )

    print("chekpoint 5")
    series_smapes = []
    for j in np.arange(len(flux_test_scaled)):
        print("Backtest iter " + str(j))
        error = lstm_model.backtest(
            series=flux_test_scaled[j],
            past_covariates=covariates_test_scaled[j],
            forecast_horizon=21,
            stride=21,
            retrain=True,
            last_points_only=False,
            metric=smape
        )
        series_smapes.append(error)
    avg_smape = np.mean(series_smapes)
    return avg_smape
desertnaut
  • 57,590
  • 26
  • 140
  • 166
crushendo
  • 33
  • 2
  • 9

0 Answers0