tuning Keras Sequential NN, tuned model doesn't perform well

Question

I am trying to tune my NN hyperparameters. This was my initial NN:


model = tf.keras.models.Sequential([
                                    tf.keras.layers.Dense(256, activation=tf.nn.tanh, input_shape=[X_train_norm.shape[1]]),
                                    tf.keras.layers.Dense(256, activation=tf.nn.tanh),
                                    tf.keras.layers.Dense(256, activation=tf.nn.tanh),
                                    tf.keras.layers.Dense(1)
])

model.compile(loss = 'mse',
              optimizer = tf.keras.optimizers.RMSprop(0.001),
              metrics = ['mse','mae'])

history = model.fit(X_train, y_train,
                    epochs = 80,
                    batch_size = 8,
                    validation_split = 0.2)

R2_train_set= 0.89
RMSE_train_set=80
R2_test_set= 0.85
RMSE_test_set=94

Using several hyperparameters tuning methods made the model worst!! I would love an explanation! what am I missing?

my code

import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras

option 1:

#https://coderzcolumn.com/tutorials/artificial-intelligence/keras-tuner-hyperparameters-tuning-of-keras-models

def build_model(hyperparams):
    model = keras.models.Sequential()
    
    model.add(keras.layers.Dense(units=hyperparams.Int("units_l1", 32, 512, step=32),
                           activation=hyperparams.Choice("act_l1", ["relu", "tanh"]),
                           input_shape=[X_train.shape[1]] 
                          ))
    model.add(keras.layers.Dense(units=hyperparams.Int("units_l2", 32, 512, step=32),
                           activation=hyperparams.Choice("act_l2", ["relu", "tanh"])
                          ))
    model.add(keras.layers.Dense(units=hyperparams.Int("units_l3", 16, 64, step=16),
                           activation=hyperparams.Choice("act_l3", ["relu", "tanh"])
                          ))
    model.add(keras.layers.Dense(1))
    
    learning_rate=hyperparams.Choice('learning_rate', [0.001, 0.01, 0.1, 1.0])
    
    optim=hyperparams.Choice("optimizer",["rmsprop","adam"])
    
    if optim == 'adam':
        optim = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optim == 'rmsprop':
        optim = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
        
    
    model.compile(optimizer = optim, loss="mean_squared_error", metrics = ['mean_squared_error','mean_absolute_error'])

    return model

tuner1 =  kt.RandomSearch(hypermodel=build_model,
                      objective="val_mean_squared_error",
                      max_trials=150,
                      project_name="Regression",
                      overwrite=True
                    )

tuner1.search(X_train, y_train, batch_size=8, epochs=30, validation_split=0.2 )

best_model = tuner1.get_best_models()[0]
best_model.summary()

best_params = tuner1.get_best_hyperparameters()
best_params[0].values

the best params: {'units_l1': 192,
 'act_l1': 'relu',
 'units_l2': 448,
 'act_l2': 'tanh',
 'units_l3': 64,
 'act_l3': 'relu',
 'learning_rate': 0.001,
 'optimizer': 'adam'}

R2_train_set= 0.625
RMSE_train_set=132
R2_test_set= 0.62
RMSE_test_set=130

option 2:

# Python def get average of a list
def Average(lst):
    return sum(lst) / len(lst)

# Define your hyperparameters:
hyperparameters = {
    'num_units': [32, 64, 128, 256], #4, 8, 16, 
    'optimizer': ['adam', 'rmsprop'],
    'activation': ['relu', 'tanh'],
    'learning_rate': [0.001, 0.01, 0.1, 1],
    'batch_size': [8, 16, 32, 64, 128]
}

min_mse = 150000

# Tune the hyperparameters
for units in hyperparameters['num_units']:
    for drop in [1]:
        for opt in hyperparameters['optimizer']:
            for activ in hyperparameters['activation']:
                for lear_rate in hyperparameters['learning_rate']:
                    for batch in hyperparameters['batch_size']:

                        # Define the model architecture
                        model = tf.keras.models.Sequential([
                            tf.keras.layers.Dense(units, activation=activ, input_shape=[X_train_norm.shape[1]]),
                            tf.keras.layers.Dense(units, activation=activ),
                            tf.keras.layers.Dense(units, activation=activ),
                            tf.keras.layers.Dense(128, activation=activ),
                            tf.keras.layers.Dense(1)
                        ])

                        # Define the optimizer
                        if opt == 'adam':
                            opt = tf.keras.optimizers.Adam(learning_rate=lear_rate)
                        elif opt == 'rmsprop':
                            opt = tf.keras.optimizers.RMSprop(learning_rate=lear_rate)

                        # Compile the model
                        model.compile(optimizer=opt, metrics=['mse', 'mae'], loss='mse')

                        # Train the model cross-validation
                        history = model.fit(X_train_norm, y_train, epochs=80, batch_size=batch, validation_split=0.2, verbose=0)

                        print( f'units: {units}, batch: {batch}, learning_rate: {lear_rate}, opt  : {opt} , activ : {activ}')
                        
                        Last_his = Average(history.history['mse'][-10:-1])
                        # Check if this is the best model so far
                        if Last_his < min_mse:
                            min_mse = Last_his
                            my_str = f'BEST - units: {units}, batch: {batch}, learning_rate: {lear_rate}, opt  : {opt} , activ : {activ}'
                            print(my_str)

# Print the best hyperparameters found
print(my_str)

BEST model units: 128, batch: 8, learning_rate: 0.001, opt  : <keras.optimizers.optimizer_v2.adam.Adam object at 0x000002083EEA6AD0> , activ : tanh

RMSE_train set= 177

I noticed the mean_squared_error did go to the min within the combination tested and chosen as "best". I tried with and without normalizing the data. I tested that the data did not change...

tuning Keras Sequential NN, tuned model doesn't perform well

0 Answers0