I have implemented a variational autoencoder with the Keras implementation as an example (https://keras.io/examples/generative/vae/). When plotting the training loss I noticed that these were not the same as displayed in the console. I also saw that the displayed loss in the console in the Keras example was not right considering total_loss = reconstruction_loss + kl_loss.
Is the displayed loss in the console not the total_loss?
My VAE code:
class Sampling(layers.Layer):
"""Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5 * z_log_var) * epsilon
latent_dim = 100
encoder_inputs = keras.Input(shape=(64, 64, 3)) #eigentlich 160
x = layers.Conv2D(32, 4, strides=2, padding="same")(encoder_inputs)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(32, 3, strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(64, 4,strides=2, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(64, 3,strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(128, 4,strides=2, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(64, 3,strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(32, 3,strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2D(100, 8,strides=1, padding="valid")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Flatten()(x)
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
z = Sampling()([z_mean, z_log_var])
encoder = keras.Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
latent_inputs = keras.Input(shape=(latent_dim,))
x = layers.Reshape((1, 1, 100))(latent_inputs)
x = layers.Conv2DTranspose(100, 8, strides=1, padding="valid")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(32, 3, strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(64, 3, strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(128, 4, strides=2, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(64, 3, strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(64, 4, strides=2, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(32, 3, strides=1, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
x = layers.Conv2DTranspose(32, 4, strides=2, padding="same")(x)
x = layers.LeakyReLU(alpha=0.2)(x)
decoder_outputs = layers.Conv2DTranspose(3, 3, activation="sigmoid", padding="same")(x)
decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()
class VAE(keras.Model):
def __init__(self, encoder, decoder, encoder_t1, encoder_t2, encoder_t3, encoder_t4, **kwargs):
super(VAE, self).__init__(**kwargs)
self.encoder = encoder
self.decoder = decoder
def train_step(self, data):
if isinstance(data, tuple):
data = data[0]
with tf.GradientTape() as tape:
z_mean, z_log_var, z = encoder(data)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean( #mean
keras.losses.mse(data, reconstruction) #binary_crossentropy
)
reconstruction_loss *= 64 * 64 #entspricht bildgröße
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss) #mean
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
grads = tape.gradient(total_loss, self.trainable_weights)
self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
return {
"loss": total_loss,
"reconstruction_loss": reconstruction_loss,
"kl_loss": kl_loss,
}
def call(self, inputs):
z_mean, z_log_var, z = encoder(inputs)
reconstruction = decoder(z)
reconstruction_loss = tf.reduce_mean(
keras.losses.mse(inputs, reconstruction)
)
reconstruction_loss *= 64 * 64
kl_loss = 1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var)
kl_loss = tf.reduce_mean(kl_loss)
kl_loss *= -0.5
total_loss = reconstruction_loss + kl_loss
self.add_metric(kl_loss, name='kl_loss', aggregation='mean')
self.add_metric(total_loss, name='total_loss', aggregation='mean')
self.add_metric(reconstruction_loss, name='reconstruction_loss', aggregation='mean')
return reconstruction
When I plot my loss with the following code:
vae_train = vae.fit(
train_generator,
steps_per_epoch=nb_train_samples,
epochs=nb_epoch,
validation_data=val_generator,
validation_steps=nb_validation_samples, #141 #3963
callbacks=[es_callback]
)
loss = vae_train.history['loss']
val_loss = vae_train.history['val_total_loss']
plt.figure()
plt.plot(range(len(loss)), loss, 'b', label = 'Training loss')
plt.plot(range(len(val_loss)), val_loss, 'm', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()
The resulting plot displays the loss differently than the displayed loss in the console. As the displayed loss in the console is not reconstruction_loss + kl_loss but the plotted loss is.
For example the displayed loss here is not correct, but it is plotted right: (interestingly the val_total_loss is displayed correctly)
Epoch 20/100
1266/1266 [==============================] - 82s 65ms/step - loss: 45.2503 - reconstruction_loss: 49.9395 - kl_loss: 0.5695 - val_loss: 0.0000e+00 - val_kl_loss: 0.5888 - val_total_loss: 48.9094 - val_reconstruction_loss: 48.3206