I'm using Keras Tuner to hyper parameterize my CNN to process EMG data. The code is being run on 4 nvidia gpus and 5 cpus with TensorFlow's mirrored strategy. When I run the tuner, it's able to run through all the trials and each for two epochs. When it tries to restore the models it saved to run more epochs for the next hyperband itereration It throws this error:
Make sure the slot variables are created under the same strategy scope. This may happen if you're restoring from a checkpoint outside the scope
This is the code:
class CNNHyperModel(HyperModel):
def __init__(self, input_shape, output_shape):
self.input_shape = input_shape
self.output_shape = output_shape
def build(self, hp):
visible = Input(shape = (self.input_shape[0], self.input_shape[1], 1))
filter_number = hp.Int("filter_base_size", 5, 7)
conv = Conv2D(filters=2**(filter_number), \
kernel_size=(4,4), activation='relu', padding = 'same')(visible)
pooling = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(conv)
conv2 = Conv2D(filters=2**(filter_number + 1), \
kernel_size=(3,3), activation='relu', padding = 'same')(pooling)
pooling2 = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(conv2)
conv3 = Conv2D(filters=2**(filter_number + 2), \
kernel_size=(2,2), activation='relu', padding = 'same')(pooling2)
pooling3 = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid')(conv3)
flatten = Flatten()(pooling3)
dropout = Dropout(0.5)(flatten)
num_layers = hp.Int("num_layers", 3, 4)
dense_units = [hp.Int("dense_units_1", 80, 240, 30), \
hp.Int("dense_units_2", 80, 240, 30), hp.Int("dense_units_3", 80, 240, 30), \
hp.Int("dense_units_4", 80, 240, 30)]
#num_layers = 4
#dense_units = [hp.Int("dense_units_2", 80, 240, 30), 120, 140, 200]
hidden = dropout
for i in range(num_layers):
hidden = Dense(units=dense_units[i], activation = 'relu')(hidden)
output = Dense(units=self.output_shape[0], activation = 'softmax')(hidden)
model = Model(inputs=visible, outputs=output)
model.compile(
optimizer="adam",
loss="categorical_crossentropy",
metrics = "accuracy",
)
return model
strategy = tf.distribute.MirroredStrategy()
batch_size = 800
epochs = 80
val_dataset = (np.array(testing_input), np.array(testing_output))
tuner = Hyperband(
hypermodel = CNNHyperModel(
input_shape = [len(training_input[0]), len(training_input[0][0])],
output_shape = [len(training_output[0])],
),
objective='accuracy',
distribution_strategy=strategy,
hyperband_iterations=5
)
early_stopping_callback = keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=5
)
tuner.search(
x = np.array(training_input),
y = np.array(training_output),
validation_data=val_dataset,
callbacks=[early_stopping_callback],
batch_size=batch_size,
epochs = epochs,
)
best_model = tuner.get_best_models(num_models=1)[0]
best_model.fit(
x = np.array(training_input),
y = np.array(training_output),
epochs=epochs,
batch_size=batch_size,
validation_data=val_dataset,
)