I am trying to tune my NN hyperparameters. This was my initial NN:
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(256, activation=tf.nn.tanh, input_shape=[X_train_norm.shape[1]]),
tf.keras.layers.Dense(256, activation=tf.nn.tanh),
tf.keras.layers.Dense(256, activation=tf.nn.tanh),
tf.keras.layers.Dense(1)
])
model.compile(loss = 'mse',
optimizer = tf.keras.optimizers.RMSprop(0.001),
metrics = ['mse','mae'])
history = model.fit(X_train, y_train,
epochs = 80,
batch_size = 8,
validation_split = 0.2)
R2_train_set= 0.89
RMSE_train_set=80
R2_test_set= 0.85
RMSE_test_set=94
Using several hyperparameters tuning methods made the model worst!! I would love an explanation! what am I missing?
my code
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
option 1:
#https://coderzcolumn.com/tutorials/artificial-intelligence/keras-tuner-hyperparameters-tuning-of-keras-models
def build_model(hyperparams):
model = keras.models.Sequential()
model.add(keras.layers.Dense(units=hyperparams.Int("units_l1", 32, 512, step=32),
activation=hyperparams.Choice("act_l1", ["relu", "tanh"]),
input_shape=[X_train.shape[1]]
))
model.add(keras.layers.Dense(units=hyperparams.Int("units_l2", 32, 512, step=32),
activation=hyperparams.Choice("act_l2", ["relu", "tanh"])
))
model.add(keras.layers.Dense(units=hyperparams.Int("units_l3", 16, 64, step=16),
activation=hyperparams.Choice("act_l3", ["relu", "tanh"])
))
model.add(keras.layers.Dense(1))
learning_rate=hyperparams.Choice('learning_rate', [0.001, 0.01, 0.1, 1.0])
optim=hyperparams.Choice("optimizer",["rmsprop","adam"])
if optim == 'adam':
optim = tf.keras.optimizers.Adam(learning_rate=learning_rate)
elif optim == 'rmsprop':
optim = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
model.compile(optimizer = optim, loss="mean_squared_error", metrics = ['mean_squared_error','mean_absolute_error'])
return model
tuner1 = kt.RandomSearch(hypermodel=build_model,
objective="val_mean_squared_error",
max_trials=150,
project_name="Regression",
overwrite=True
)
tuner1.search(X_train, y_train, batch_size=8, epochs=30, validation_split=0.2 )
best_model = tuner1.get_best_models()[0]
best_model.summary()
best_params = tuner1.get_best_hyperparameters()
best_params[0].values
the best params: {'units_l1': 192,
'act_l1': 'relu',
'units_l2': 448,
'act_l2': 'tanh',
'units_l3': 64,
'act_l3': 'relu',
'learning_rate': 0.001,
'optimizer': 'adam'}
R2_train_set= 0.625
RMSE_train_set=132
R2_test_set= 0.62
RMSE_test_set=130
option 2:
# Python def get average of a list
def Average(lst):
return sum(lst) / len(lst)
# Define your hyperparameters:
hyperparameters = {
'num_units': [32, 64, 128, 256], #4, 8, 16,
'optimizer': ['adam', 'rmsprop'],
'activation': ['relu', 'tanh'],
'learning_rate': [0.001, 0.01, 0.1, 1],
'batch_size': [8, 16, 32, 64, 128]
}
min_mse = 150000
# Tune the hyperparameters
for units in hyperparameters['num_units']:
for drop in [1]:
for opt in hyperparameters['optimizer']:
for activ in hyperparameters['activation']:
for lear_rate in hyperparameters['learning_rate']:
for batch in hyperparameters['batch_size']:
# Define the model architecture
model = tf.keras.models.Sequential([
tf.keras.layers.Dense(units, activation=activ, input_shape=[X_train_norm.shape[1]]),
tf.keras.layers.Dense(units, activation=activ),
tf.keras.layers.Dense(units, activation=activ),
tf.keras.layers.Dense(128, activation=activ),
tf.keras.layers.Dense(1)
])
# Define the optimizer
if opt == 'adam':
opt = tf.keras.optimizers.Adam(learning_rate=lear_rate)
elif opt == 'rmsprop':
opt = tf.keras.optimizers.RMSprop(learning_rate=lear_rate)
# Compile the model
model.compile(optimizer=opt, metrics=['mse', 'mae'], loss='mse')
# Train the model cross-validation
history = model.fit(X_train_norm, y_train, epochs=80, batch_size=batch, validation_split=0.2, verbose=0)
print( f'units: {units}, batch: {batch}, learning_rate: {lear_rate}, opt : {opt} , activ : {activ}')
Last_his = Average(history.history['mse'][-10:-1])
# Check if this is the best model so far
if Last_his < min_mse:
min_mse = Last_his
my_str = f'BEST - units: {units}, batch: {batch}, learning_rate: {lear_rate}, opt : {opt} , activ : {activ}'
print(my_str)
# Print the best hyperparameters found
print(my_str)
BEST model units: 128, batch: 8, learning_rate: 0.001, opt : <keras.optimizers.optimizer_v2.adam.Adam object at 0x000002083EEA6AD0> , activ : tanh
RMSE_train set= 177
I noticed the mean_squared_error
did go to the min within the combination tested and chosen as "best". I tried with and without normalizing the data. I tested that the data did not change...