RuntimeError: Method requires being in cross-replica context, use get_replica_context().merge_call()

Question

I try to modify a running CycleGAN from SingleGPU to tf.distribute.MirroredStrategy. Having tried several things like custom training loops, Question of jongsung park, adjustments after the Tensorflow Tutorial and several places of strategy.scope(). Yet I still get the following error.

Ausnahme: RuntimeError
in user code:

    File "C:\Users\Einka\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "w:\300_Neural_Network\320_Unsupervised_GAN_CycleConsistency\CycleGAN_Custom_Trainingloop", line 490, in train_step  *
        G_loss, F_loss, F_X_loss, D_Y_loss = strategy.run(self.train_step_single, args=(self, batch_data))

    RuntimeError: Method requires being in cross-replica context, use get_replica_context().merge_call()
  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_file9_hoxhkq.py", line 10, in tf__train_step
    (G_loss, F_loss, F_X_loss, D_Y_loss) = ag__.converted_call(ag__.ld(strategy).run, (ag__.ld(self).train_step_single,), dict(args=(ag__.ld(self), ag__.ld(batch_data))), fscope)

During handling of the above exception, another exception occurred:

  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_file9_hoxhkq.py", line 10, in tf__train_step
    (G_loss, F_loss, F_X_loss, D_Y_loss) = ag__.converted_call(ag__.ld(strategy).run, (ag__.ld(self).train_step_single,), dict(args=(ag__.ld(self), ag__.ld(batch_data))), fscope)

During handling of the above exception, another exception occurred:

  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_filejk9kpr6g.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_file9_hoxhkq.py", line 10, in tf__train_step
    (G_loss, F_loss, F_X_loss, D_Y_loss) = ag__.converted_call(ag__.ld(strategy).run, (ag__.ld(self).train_step_single,), dict(args=(ag__.ld(self), ag__.ld(batch_data))), fscope)

During handling of the above exception, another exception occurred:

  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_file9_hoxhkq.py", line 10, in tf__train_step
    (G_loss, F_loss, F_X_loss, D_Y_loss) = ag__.converted_call(ag__.ld(strategy).run, (ag__.ld(self).train_step_single,), dict(args=(ag__.ld(self), ag__.ld(batch_data))), fscope)
  File "C:\Users\Einka\AppData\Local\Temp\__autograph_generated_filejk9kpr6g.py", line 15, in tf__train_function
    retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
  File "W:\300_Neural_Network\320_Unsupervised_GAN_CycleConsistency\CycleGAN_Custom_Trainingloop", line 575, in <module>
    cycle_gan_model.fit(

The original model is the CycleGan from keras.io. The Code posted below is also available on Colab


import random
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import tensorflow_addons as tfa
import tensorflow_datasets as tfds

#CONFIGURATION
EPOCHS = 1
BATCH_SIZE_PER_REPLICA = 1
BUFFER_SIZE = 256

#Tensorboard
logdir = "W:/300_Neural_Network/320_Unsupervised_GAN_CycleConsistency/logs/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)
file_writer = tf.summary.create_file_writer(logdir + "/metrics")
file_writer.set_as_default()
# ----

# Distribution strategy
strategy = tf.distribute.MirroredStrategy()
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
tf.compat.v1.keras.backend.set_session(session)

# Visual parameters
tfds.disable_progress_bar()
# autotune = tf.data.AUTOTUNE

# Load the horse-zebra dataset using tensorflow-datasets.
dataset, _ = tfds.load("cycle_gan/horse2zebra", with_info=True, as_supervised=True)
train_horses, train_zebras = dataset["trainA"], dataset["trainB"]
test_horses, test_zebras = dataset["testA"], dataset["testB"]

# Define the standard image size.
orig_img_size = (286, 286)
# Size of the random crops to be used during training.
input_img_size = (256, 256, 3)
# Weights initializer for the layers.
kernel_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02, seed=random.seed(random.random()))
# Gamma initializer for instance normalization.
gamma_init = keras.initializers.RandomNormal(mean=0.0, stddev=0.02, seed=random.seed(random.random()))

batch_size = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync


def normalize_img(img):
    img = tf.cast(img, dtype=tf.float32)
    # Map values in the range [-1, 1]
    return (img / 127.5) - 1.0


def preprocess_train_image(img, label):
    # Random flip
    img = tf.image.random_flip_left_right(img)
    # Resize to the original size first
    img = tf.image.resize(img, [*orig_img_size])
    # Random crop to 256X256
    img = tf.image.random_crop(img, size=[*input_img_size])
    # Normalize the pixel values in the range [-1, 1]
    img = normalize_img(img)
    return img


def preprocess_test_image(img, label):
    # Only resizing and normalization for the test images.
    img = tf.image.resize(img, [input_img_size[0], input_img_size[1]])
    img = normalize_img(img)
    return img

def distribute_datasets(strategy, train_batches, test_batches):
    
    ### START CODE HERE ###
    train_dist_dataset = strategy.experimental_distribute_dataset(train_batches)
    test_dist_dataset = strategy.experimental_distribute_dataset(test_batches)
    ### END CODE HERE ###
    
    return train_dist_dataset,  test_dist_dataset

# Apply the preprocessing operations to the training data
train_horses = (
    train_horses.map(preprocess_train_image)
    .cache()
    .shuffle(BUFFER_SIZE)
    .batch(batch_size)
    .prefetch(1)
)

train_zebras = (
    train_zebras.map(preprocess_train_image)
    .cache()
    .shuffle(BUFFER_SIZE)
    .batch(batch_size)
    .prefetch(1)
)

# Apply the preprocessing operations to the test data
test_horses = (
    test_horses.map(preprocess_test_image)
    .cache()
    .shuffle(BUFFER_SIZE)
    .batch(batch_size)
)
test_zebras = (
    test_zebras.map(preprocess_test_image)
    .cache()
    .shuffle(BUFFER_SIZE)
    .batch(batch_size)
)

train_set, test_set = distribute_datasets(strategy, tf.data.Dataset.zip((train_horses, train_zebras)), tf.data.Dataset.zip((test_horses, test_zebras)))

class ReflectionPadding2D(layers.Layer):
    """Implements Reflection Padding as a layer.

    Args:
        padding(tuple): Amount of padding for the
        spatial dimensions.

    Returns:
        A padded tensor with the same type as the input tensor.
    """

    def __init__(self, padding=(1, 1), **kwargs):
        self.padding = tuple(padding)
        super(ReflectionPadding2D, self).__init__(**kwargs)

    def call(self, input_tensor, mask=None):
        padding_width, padding_height = self.padding
        padding_tensor = [
            [0, 0],
            [padding_height, padding_height],
            [padding_width, padding_width],
            [0, 0],
        ]
        return tf.pad(input_tensor, padding_tensor, mode="REFLECT")


def residual_block(
    x,
    activation,
    kernel_initializer=kernel_init,
    kernel_size=(3, 3),
    strides=(1, 1),
    padding="valid",
    gamma_initializer=gamma_init,
    use_bias=False,
):
    dim = x.shape[-1]
    input_tensor = x

    x = ReflectionPadding2D()(input_tensor)
    x = layers.Conv2D(
        dim,
        kernel_size,
        strides=strides,
        kernel_initializer=kernel_initializer,
        padding=padding,
        use_bias=use_bias,
    )(x)
    x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
    x = activation(x)

    x = ReflectionPadding2D()(x)
    x = layers.Conv2D(
        dim,
        kernel_size,
        strides=strides,
        kernel_initializer=kernel_initializer,
        padding=padding,
        use_bias=use_bias,
    )(x)
    x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
    x = layers.add([input_tensor, x])
    return x


def downsample(
    x,
    filters,
    activation,
    kernel_initializer=kernel_init,
    kernel_size=(3, 3),
    strides=(2, 2),
    padding="same",
    gamma_initializer=gamma_init,
    use_bias=False,
):
    x = layers.Conv2D(
        filters,
        kernel_size,
        strides=strides,
        kernel_initializer=kernel_initializer,
        padding=padding,
        use_bias=use_bias,
    )(x)
    x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
    if activation:
        x = activation(x)
    return x


def upsample(
    x,
    filters,
    activation,
    kernel_size=(3, 3),
    strides=(2, 2),
    padding="same",
    kernel_initializer=kernel_init,
    gamma_initializer=gamma_init,
    use_bias=False,
):
    x = layers.Conv2DTranspose(
        filters,
        kernel_size,
        strides=strides,
        padding=padding,
        kernel_initializer=kernel_initializer,
        use_bias=use_bias,
    )(x)
    x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
    if activation:
        x = activation(x)
    return x

def get_resnet_generator(
    filters=64,
    num_downsampling_blocks=2,
    num_residual_blocks=9,
    num_upsample_blocks=2,
    gamma_initializer=gamma_init,
    name=None,
):
    img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
    x = ReflectionPadding2D(padding=(3, 3))(img_input)
    x = layers.Conv2D(filters, (7, 7), kernel_initializer=kernel_init, use_bias=False)(
        x
    )
    x = tfa.layers.InstanceNormalization(gamma_initializer=gamma_initializer)(x)
    x = layers.Activation("relu")(x)

    # Downsampling
    for _ in range(num_downsampling_blocks):
        filters *= 2
        x = downsample(x, filters=filters, activation=layers.Activation("relu"))

    # Residual blocks
    for _ in range(num_residual_blocks):
        x = residual_block(x, activation=layers.Activation("relu"))

    # Upsampling
    for _ in range(num_upsample_blocks):
        filters //= 2
        x = upsample(x, filters, activation=layers.Activation("relu"))

    # Final block
    x = ReflectionPadding2D(padding=(3, 3))(x)
    x = layers.Conv2D(3, (7, 7), padding="valid")(x)
    x = layers.Activation("tanh")(x)

    model = keras.models.Model(img_input, x, name=name)
    return model


def get_discriminator(
    filters=64, kernel_initializer=kernel_init, num_downsampling=3, name=None
):
    img_input = layers.Input(shape=input_img_size, name=name + "_img_input")
    x = layers.Conv2D(
        filters,
        (4, 4),
        strides=(2, 2),
        padding="same",
        kernel_initializer=kernel_initializer,
    )(img_input)
    x = layers.LeakyReLU(0.2)(x)

    num_filters = filters
    for num_downsample_block in range(3):
        num_filters *= 2
        if num_downsample_block < 2:
            x = downsample(
                x,
                filters=num_filters,
                activation=layers.LeakyReLU(0.2),
                kernel_size=(4, 4),
                strides=(2, 2),
            )
        else:
            x = downsample(
                x,
                filters=num_filters,
                activation=layers.LeakyReLU(0.2),
                kernel_size=(4, 4),
                strides=(1, 1),
            )

    x = layers.Conv2D(
        1, (4, 4), strides=(1, 1), padding="same", kernel_initializer=kernel_initializer
    )(x)

    model = keras.models.Model(inputs=img_input, outputs=x, name=name)
    return model


# Get the generators
gen_G = get_resnet_generator(name="generator_G")
gen_F = get_resnet_generator(name="generator_F")

# Get the discriminators
disc_X = get_discriminator(name="discriminator_X")
disc_Y = get_discriminator(name="discriminator_Y")

class CycleGan(keras.Model):
    def __init__(
        self,
        generator_G,
        generator_F,
        discriminator_X,
        discriminator_Y,
        lambda_cycle=10.0,
        lambda_identity=0.5,
    ):
        super(CycleGan, self).__init__()
        self.gen_G = generator_G
        self.gen_F = generator_F
        self.disc_X = discriminator_X
        self.disc_Y = discriminator_Y
        self.lambda_cycle = lambda_cycle
        self.lambda_identity = lambda_identity

    def compile(
        self,
        gen_G_optimizer,
        gen_F_optimizer,
        disc_X_optimizer,
        disc_Y_optimizer,
        gen_loss_fn,
        disc_loss_fn,
        cycle_loss_fn,
        identity_loss_fn,
    ):
        super(CycleGan, self).compile()
        self.gen_G_optimizer = gen_G_optimizer
        self.gen_F_optimizer = gen_F_optimizer
        self.disc_X_optimizer = disc_X_optimizer
        self.disc_Y_optimizer = disc_Y_optimizer
        self.generator_loss_fn = gen_loss_fn
        self.discriminator_loss_fn = disc_loss_fn
        self.cycle_loss_fn = cycle_loss_fn 
        self.identity_loss_fn = identity_loss_fn

    def __call__ ( self, batch_data ):
        real_x, real_y = batch_data
        genG = self.gen_G
        genF = self.gen_F 

        return genG(real_x), genF(real_y)

    def call ( self, batch_data ):
        real_x, real_y = batch_data
        genG = self.gen_G
        genF = self.gen_F 

        return genG(real_x), genF(real_y)    
    def compute_output_shape(input_shape=(None, 256, 256, 3)):
        return input_shape

    def train_step_single(self, batch_data):
        # with strategy.scope():
        # x is Horse and y is zebra
        real_x, real_y = batch_data

        with tf.GradientTape(persistent=True) as tape:
            # Horse to fake zebra
            fake_y = self.gen_G(real_x, training=True)
            # Zebra to fake horse -> y2x
            fake_x = self.gen_F(real_y, training=True)

            # Cycle (Horse to fake zebra to fake horse): x -> y -> x
            cycled_x = self.gen_F(fake_y, training=True)
            # Cycle (Zebra to fake horse to fake zebra) y -> x -> y
            cycled_y = self.gen_G(fake_x, training=True)

            # Identity mapping
            same_x = self.gen_F(real_x, training=True)
            same_y = self.gen_G(real_y, training=True)

            # Discriminator output
            disc_real_x = self.disc_X(real_x, training=True)
            disc_fake_x = self.disc_X(fake_x, training=True)

            disc_real_y = self.disc_Y(real_y, training=True)
            disc_fake_y = self.disc_Y(fake_y, training=True)

            # Generator adverserial loss
            gen_G_loss = self.generator_loss_fn(disc_fake_y)
            gen_F_loss = self.generator_loss_fn(disc_fake_x)

            # Generator cycle loss
            cycle_loss_G = self.cycle_loss_fn(real_y, cycled_y) * self.lambda_cycle
            cycle_loss_F = self.cycle_loss_fn(real_x, cycled_x) * self.lambda_cycle

            # Generator identity loss
            id_loss_G = (
                self.identity_loss_fn(real_y, same_y)
                * self.lambda_cycle
                * self.lambda_identity
            )
            id_loss_F = (
                self.identity_loss_fn(real_x, same_x)
                * self.lambda_cycle
                * self.lambda_identity
            )

            # Total generator loss
            total_loss_G = gen_G_loss + cycle_loss_G + id_loss_G
            total_loss_F = gen_F_loss + cycle_loss_F + id_loss_F

            # Discriminator loss
            disc_X_loss = self.discriminator_loss_fn(disc_real_x, disc_fake_x)
            disc_Y_loss = self.discriminator_loss_fn(disc_real_y, disc_fake_y)

        # Get the gradients for the generators
        grads_G = tape.gradient(total_loss_G, self.gen_G.trainable_variables)
        grads_F = tape.gradient(total_loss_F, self.gen_F.trainable_variables)

        # Get the gradients for the discriminators
        disc_X_grads = tape.gradient(disc_X_loss, self.disc_X.trainable_variables)
        disc_Y_grads = tape.gradient(disc_Y_loss, self.disc_Y.trainable_variables)

        # Update the weights of the generators
        self.gen_G_optimizer.apply_gradients(
            zip(grads_G, self.gen_G.trainable_variables)
        )
        self.gen_F_optimizer.apply_gradients(
            zip(grads_F, self.gen_F.trainable_variables)
        )

        # Update the weights of the discriminators
        self.disc_X_optimizer.apply_gradients(
            zip(disc_X_grads, self.disc_X.trainable_variables)
        )
        self.disc_Y_optimizer.apply_gradients(
            zip(disc_Y_grads, self.disc_Y.trainable_variables)
        )

        return {
            "G_loss": total_loss_G,
            "F_loss": total_loss_F,
            "D_X_loss": disc_X_loss,
            "D_Y_loss": disc_Y_loss,
        }

    @tf.function
    def train_step(self, batch_data):
        G_loss, F_loss, F_X_loss, D_Y_loss = strategy.run(self.train_step_single, args=(self, batch_data))
        return G_loss, F_loss, F_X_loss, D_Y_loss 

assert tf.distribute.get_replica_context() is not None  # default

class GANMonitor(keras.callbacks.Callback):
    """A callback to generate and save images after each epoch"""

    def __init__(self, num_img=4):
        self.num_img = num_img

    def on_epoch_end(self, epoch, logs=None):
        _, ax = plt.subplots(4, 2, figsize=(12, 12))
        for i, img in enumerate(test_horses.take(self.num_img)):
            prediction = self.model.gen_G(img)[0].numpy()
            prediction = (prediction * 127.5 + 127.5).astype(np.uint8)
            img = (img[0] * 127.5 + 127.5).numpy().astype(np.uint8)

            ax[i, 0].imshow(img)
            ax[i, 1].imshow(prediction)
            ax[i, 0].set_title("Input image")
            ax[i, 1].set_title("Translated image")
            ax[i, 0].axis("off")
            ax[i, 1].axis("off")

            prediction = keras.preprocessing.image.array_to_img(prediction)
            prediction.save(
                "W:/300_Neural_Network/320_Unsupervised_GAN_CycleConsistency/plots/generated_img_{i}_{epoch}.png".format(i=i, epoch=epoch + 1)
            )
        plt.show()
        plt.close()


# Loss function for evaluating adversarial loss
with strategy.scope():
    adv_loss_fn = keras.losses.MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM)

    # Define the loss function for the generators
    def generator_loss_fn(fake):
        fake_loss = adv_loss_fn(tf.ones_like(fake), fake)
        return fake_loss


    # Define the loss function for the discriminators
    def discriminator_loss_fn(real, fake):
        real_loss = adv_loss_fn(tf.ones_like(real), real)
        fake_loss = adv_loss_fn(tf.zeros_like(fake), fake)
        return (real_loss + fake_loss) * 0.5


# Create cycle gan model
with strategy.scope():
    cycle_gan_model = CycleGan(
        generator_G=gen_G, generator_F=gen_F, discriminator_X=disc_X, discriminator_Y=disc_Y
    )

# Compile the model

with strategy.scope():
    cycle_gan_model.compile(
        gen_G_optimizer=keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5),
        gen_F_optimizer=keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5),
        disc_X_optimizer=keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5),
        disc_Y_optimizer=keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.5),
        gen_loss_fn=generator_loss_fn,
        disc_loss_fn=discriminator_loss_fn,
        cycle_loss_fn=keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.SUM),
        identity_loss_fn=keras.losses.MeanAbsoluteError(reduction=tf.keras.losses.Reduction.SUM),
    )

# Callbacks
plotter = GANMonitor()
checkpoint_filepath = "W:/300_Neural_Network/320_Unsupervised_GAN_CycleConsistency/checkpoints/cyclegan_checkpoints.{epoch:03d}"
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True
)

# Here we will train the model for just one epoch as each epoch takes around
# 7 minutes on a single P100 backed machine.
cycle_gan_model.fit(
    train_set,
    epochs=1,
    steps_per_epoch = 1067,
    callbacks=[tensorboard_callback, plotter, model_checkpoint_callback],
)

score 0 · Answer 1 · answered Oct 01 '22 at 07:44

The reason for the error lay in two parts.

First: The generators and discriminators were not initalized within strategy.scope()

with strategy.scope():
    # Get the generators
    gen_G = get_resnet_generator(name="generator_G")
    gen_F = get_resnet_generator(name="generator_F")

    # Get the discriminators
    disc_X = get_discriminator(name="discriminator_X")
    disc_Y = get_discriminator(name="discriminator_Y")

Second: As the model is already initalized within strategy.scope() there is no need for strategy.run(self.train_step_single, args=(self, batch_data)). It is enough to call the training_step_single directly.

Remove

 def train_step(self, batch_data):
        G_loss, F_loss, F_X_loss, D_Y_loss = strategy.run(self.train_step_single, args=(self, batch_data))
        return G_loss, F_loss, F_X_loss, D_Y_loss

and rename train_step_single to train_step and it works.

RuntimeError: Method requires being in cross-replica context, use get_replica_context().merge_call()

1 Answers1