I am trying to run a model for image classification on multi-GPU smell, my dataset is quite large and I have to use image_dataset_from_directory
strategy = tf.distribute.MirroredStrategy()
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
flowers_data=r'C:\Users\my system\Documents\data-file'
height,width=224,224
training_batch_size=64
train_set = tf.keras.preprocessing.image_dataset_from_directory(
flowers_data,
validation_split=0.1,
label_mode = 'categorical',
subset="training",
seed=47,
image_size=(height, width),
batch_size=training_batch_size)
validation_set = tf.keras.preprocessing.image_dataset_from_directory(
flowers_data,
validation_split=0.1,
label_mode = 'categorical',
subset="validation",
seed=47,
image_size=(height, width),
batch_size=training_batch_size)
with strategy.scope():
baseModel = MobileNetV2(weights="imagenet", include_top=False,
input_tensor=Input(shape=(224, 224, 3)))
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7,7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(512, activation="relu")(headModel)#-----ADD------
headModel = Dense(256, activation="relu")(headModel)#-----ADD------
headModel = Dense(250, activation="relu")(headModel)
headModel = Dense(200, activation="relu")(headModel)
headModel = Dense(150, activation="relu")(headModel)
headModel = Dense(100, activation="relu")(headModel)
headModel = Dense(50, activation="relu")(headModel)
headModel = Dense(5, activation="softmax")(headModel)
dnn_model = Model(inputs=baseModel.input, outputs=headModel)
for layer in baseModel.layers:
layer.trainable = False
dnn_model.summary()
dnn_model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
history = dnn_model.fit(
train_set,
validation_data=validation_set,
epochs=5
)
Unfortunately, I will not succeed The error I get is:
InvalidArgumentError: No OpKernel was registered to support Op 'NcclAllReduce' used by {{node SGD/NcclAllReduce}} with these attrs: [reduction="sum", shared_name="c1", T=DT_FLOAT, num_devices=2]
Registered devices: [CPU, GPU]
Registered kernels:
<no registered kernels>
[[SGD/NcclAllReduce]] [Op:__inference_train_function_19769]
Please check all my code Thank You
Mirrored Strategy Data distribution