How to save history for k-fold cross-validation Tensorflow model?

Question

I have a Tensorflow workflow set-up to split my training data and use k-fold cross-validation where the script iterates k-times and trains a new model on each subset of the data. However, I'm having an issue saving the individual training history independently each time.

Here is the code I'm using to train the model using k-fold cross-validation:

model_kfold = 3
model_epoch = 5

# initialize empty list for holding each model training history objects
historylist = []

# create callbacks list to avoid model overfitting during training and save training history
callback_list = [History()]
callback_list.append(ReduceLROnPlateau(monitor='loss', factor=model_lr_factor, patience=model_lr_patience, min_delta=model_lr_delta, mode='min', verbose=1))
callback_list.append(EarlyStopping(monitor='loss', patience=model_early_stop_patience, restore_best_weights=True, verbose=1))
        
# compile, train, and evaluate model using kfold cross-validation
if model_kfold > 1:
    # get the total dataset length
    dslen = tf.data.experimental.cardinality(dat).numpy()

    # create a kfold split from the number of splits in the user arguments
    # NOTE: this assumes that the data has already been shuffled
    kfold = KFold(n_splits=model_kfold, shuffle=False)

    kcounter = 1
            
    for _,te in kfold.split(np.arange(dslen)):
        print('\nTRAINING MODEL {} of {}'.format(kcounter,model_kfold))

        # Split dataset to training and testing (evaluation) dataset 
        # extract testing dataset (used for model evaluation)
        data_eval = dat.skip(te[0]).take(te[-1]-te[0])

        """
        To extract the training dataset around the validation dataset, the original
        training dataset must first be split into two parts:
            part 1: contains data from the beginning of the training dataset up to 
                    the beginning of the testing dataset
            part 2: contains data from the end of the testing dataset up to the end
                    of the original training dataset
        Parts 1 and 2 are then concatenated to get a single combined training dataset.
        """
        if te[0] == 0:
            traina = dat.take(0)
        else:
            traina = dat.take(te[0]-1)
        if te[1] == dslen:
            trainb = dat.take(0)
        else:
            trainb = dat.skip(te[1]+1).take(dslen-te[1])
        # concatenate the two training dataset
        data_train = traina.concatenate(trainb)
        del(traina, trainb)      # clean up workspace

        # Split self.trainds into training and validation dataset 
        # get training dataset length
        trainlen = tf.data.experimental.cardinality(data_train).numpy()
        kfold_splits = (model_kfold*2-2)
        data_val = data_train.take(int(trainlen/kfold_splits))
        data_train = data_train.skip(int(trainlen/kfold_splits)).take(trainlen*kfold_splits-1/kfold_splits)
        del(kfold_splits, trainlen)

        # batch training data
        if model_batch_size > 1:
            data_train = data_train.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
            data_val = data_val.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
            data_eval = data_eval.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
        # cache training data
        if model_cache:
            data_train = data_train.cache()
            data_val = data_val.cache()
            data_eval = data_eval.cache()
        # prefetch training data
        if model_prefetch:
            data_train = data_train.prefetch(buffer_size=tf.data.AUTOTUNE)
            data_val = data_val.prefetch(buffer_size=tf.data.AUTOTUNE)
            data_eval = data_eval.prefetch(buffer_size=tf.data.AUTOTUNE)

        # re-build the model 
        if kcounter > 1:
            # delete old model
            del(model)
            # re-build model - model compiling is built into build_model()
            build_model()
        model.reset_metrics()
        model.reset_states()
                    
        # train the model
        history = model.fit(data_train,
                                validation_data=data_val,
                                callbacks=[callback_list],
                                verbose=verbose_run,
                                epochs=model_epoch)
            
        # print the model training history to console
        print('model {} history:\n{}'.format(kcounter, np.asarray(history.history)))

        # add the training history object to the list of model histories
        historylist.append(copy.deepcopy(history.history))
        kcounter+=1

All of the code is part of a custom defined class. The build_model() function produces a TF model called model. To help clean up the code I removed all of the self. references that would otherwise assign functions, variables, and outputs to the class.

Currently the output of history appears to always build on itself from where it left off. For demonstration purposes I'm using a model_kfold of 3 (i.e. splitting the data into 66% training, 17% validation, and 17% evaluation data):

model 1 history:
{'loss': [0.8826593160629272, 0.7190579175949097, 0.694132924079895, 0.6931252479553223, 0.6907672882080078], 'accuracy': [0.48046875, 0.50390625, 0.484375, 0.5078125, 0.5], 'val_loss': [0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996], 'val_accuracy': [0.5, 0.59375, 0.5, 0.46875, 0.46875]}

model 2 history:
{'loss': [0.8826593160629272, 0.7190579175949097, 0.694132924079895, 0.6931252479553223, 0.6907672882080078, 1.4485832452774048, 0.7368494868278503, 0.7173848748207092, 0.727489173412323, 0.7137857675552368], 'accuracy': [0.48046875, 0.50390625, 0.484375, 0.5078125, 0.5, 0.4921875, 0.53515625, 0.515625, 0.4765625, 0.5], 'val_loss': [0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.7613881826400757, 0.6955892443656921, 0.6848217248916626, 0.6944230198860168, 0.69476717710495], 'val_accuracy': [0.5, 0.59375, 0.5, 0.46875, 0.46875, 0.4375, 0.46875, 0.5625, 0.5, 0.484375]}

model 3 history:
{'loss': [0.8826593160629272, 0.7190579175949097, 0.694132924079895, 0.6931252479553223, 0.6907672882080078, 1.4485832452774048, 0.7368494868278503, 0.7173848748207092, 0.727489173412323, 0.7137857675552368, 1.2148590087890625, 0.7807716727256775, 0.7444809675216675, 0.7982805371284485, 0.6838090419769287], 'accuracy': [0.48046875, 0.50390625, 0.484375, 0.5078125, 0.5, 0.4921875, 0.53515625, 0.515625, 0.4765625, 0.5, 0.48046875, 0.48046875, 0.5, 0.484375, 0.50390625], 'val_loss': [0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.7613881826400757, 0.6955892443656921, 0.6848217248916626, 0.6944230198860168, 0.69476717710495, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996], 'val_accuracy': [0.5, 0.59375, 0.5, 0.46875, 0.46875, 0.4375, 0.46875, 0.5625, 0.5, 0.484375, 0.59375, 0.53125, 0.53125, 0.5625, 0.46875]}

Instead of the above outputs, I need something that looks like this:

model 1 history:
{'loss': [0.8826593160629272, 0.7190579175949097, 0.694132924079895, 0.6931252479553223, 0.6907672882080078], 'accuracy': [0.48046875, 0.50390625, 0.484375, 0.5078125, 0.5], 'val_loss': [0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996], 'val_accuracy': [0.5, 0.59375, 0.5, 0.46875, 0.46875]}

model 2 history:
{'loss': [1.4485832452774048, 0.7368494868278503, 0.7173848748207092, 0.727489173412323, 0.7137857675552368], 'accuracy': [0.4921875, 0.53515625, 0.515625, 0.4765625, 0.5], 'val_loss': [0.7613881826400757, 0.6955892443656921, 0.6848217248916626, 0.6944230198860168, 0.69476717710495], 'val_accuracy': [0.4375, 0.46875, 0.5625, 0.5, 0.484375]}

model 3 history:
{'loss': [1.2148590087890625, 0.7807716727256775, 0.7444809675216675, 0.7982805371284485, 0.6838090419769287], 'accuracy': [0.48046875, 0.48046875, 0.5, 0.484375, 0.50390625], 'val_loss': [0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996, 0.6931471824645996], 'val_accuracy': [0.59375, 0.53125, 0.53125, 0.5625, 0.46875]}

From what I can tell it should be deleting the model completely before each successive k-fold iteration, but maybe that's not the case.

I've tried adding model.reset_metrics() and/or model.reset_states(), each with no success. Also, I've since added copy.deepcopy(history.history) when appending the history to historylist.

Regardless of how I try to "reset" or "delete" the model completely at the beginning of each k-iteration, the training history doesn't appear to reset.

Note: you never increment `kcounter`, so deletion never happens. Also does `build_model()` return something? because it seems to me there is a missing assignment, like `model=build_model()` — Luca Anzalone, May 01 '23 at 20:36
@LucaAnzalone I forgot to copy the `kcounter+=1` line from my code, but that's not the part that was breaking. The `build_model()` function is defined as a function in the same class that everything else here is part of. It returns `self.model`, which is a compiled TF model. — Phil Wernette, May 01 '23 at 22:24

score 0 · Answer 1 · answered May 03 '23 at 15:31

I think I figured out the issue, and it was simply where the callbacks list was placed in the code. Originally I had it outside the loop for k-folds, which meant the History() was being initialized once at the beginning of the code and not being reset between successive model trainings.

In other words, the following lines were in the wrong place:

# create callbacks list to avoid model overfitting during training and save training history
callback_list = [History()]
callback_list.append(ReduceLROnPlateau(monitor='loss', factor=model_lr_factor, patience=model_lr_patience, min_delta=model_lr_delta, mode='min', verbose=1))
callback_list.append(EarlyStopping(monitor='loss', patience=model_early_stop_patience, restore_best_weights=True, verbose=1))

Instead, the callbacks list should have been initialized every time a new k-fold model is generated. This way the history object is completely reset for each model. The updated code should look like this:

model_kfold = 3
model_epoch = 5

# initialize empty list for holding each model training history objects
historylist = []
        
# compile, train, and evaluate model using kfold cross-validation
if model_kfold > 1:
    # get the total dataset length
    dslen = tf.data.experimental.cardinality(dat).numpy()

    # create a kfold split from the number of splits in the user arguments
    # NOTE: this assumes that the data has already been shuffled
    kfold = KFold(n_splits=model_kfold, shuffle=False)

    kcounter = 1
            
    for _,te in kfold.split(np.arange(dslen)):
        print('\nTRAINING MODEL {} of {}'.format(kcounter,model_kfold))

        # Split dataset to training and testing (evaluation) dataset 
        # extract testing dataset (used for model evaluation)
        data_eval = dat.skip(te[0]).take(te[-1]-te[0])

        """
        To extract the training dataset around the validation dataset, the original
        training dataset must first be split into two parts:
            part 1: contains data from the beginning of the training dataset up to 
                    the beginning of the testing dataset
            part 2: contains data from the end of the testing dataset up to the end
                    of the original training dataset
        Parts 1 and 2 are then concatenated to get a single combined training dataset.
        """
        if te[0] == 0:
            traina = dat.take(0)
        else:
            traina = dat.take(te[0]-1)
        if te[1] == dslen:
            trainb = dat.take(0)
        else:
            trainb = dat.skip(te[1]+1).take(dslen-te[1])
        # concatenate the two training dataset
        data_train = traina.concatenate(trainb)
        del(traina, trainb)      # clean up workspace

        # Split self.trainds into training and validation dataset 
        # get training dataset length
        trainlen = tf.data.experimental.cardinality(data_train).numpy()
        kfold_splits = (model_kfold*2-2)
        data_val = data_train.take(int(trainlen/kfold_splits))
        data_train = data_train.skip(int(trainlen/kfold_splits)).take(trainlen*kfold_splits-1/kfold_splits)
        del(kfold_splits, trainlen)

        # batch training data
        if model_batch_size > 1:
            data_train = data_train.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
            data_val = data_val.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
            data_eval = data_eval.batch(model_batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
        # cache training data
        if model_cache:
            data_train = data_train.cache()
            data_val = data_val.cache()
            data_eval = data_eval.cache()
        # prefetch training data
        if model_prefetch:
            data_train = data_train.prefetch(buffer_size=tf.data.AUTOTUNE)
            data_val = data_val.prefetch(buffer_size=tf.data.AUTOTUNE)
            data_eval = data_eval.prefetch(buffer_size=tf.data.AUTOTUNE)

        # re-build the model 
        if kcounter > 1:
            # delete old model
            del(model)
            # re-build model - model compiling is built into build_model()
            build_model()
        
        # create callbacks list to avoid model overfitting during training and save training history
        callback_list = [History()]
        callback_list.append(ReduceLROnPlateau(monitor='loss', factor=model_lr_factor, patience=model_lr_patience, min_delta=model_lr_delta, mode='min', verbose=1))
        callback_list.append(EarlyStopping(monitor='loss', patience=model_early_stop_patience, restore_best_weights=True, verbose=1))
                    
        # train the model
        history = model.fit(data_train,
                                validation_data=data_val,
                                callbacks=[callback_list],
                                verbose=verbose_run,
                                epochs=model_epoch)
            
        # print the model training history to console
        print('model {} history:\n{}'.format(kcounter, np.asarray(history.history)))

        # add the training history object to the list of model histories
        historylist.append(copy.deepcopy(history.history))
        kcounter+=1

How to save history for k-fold cross-validation Tensorflow model?

1 Answers1