VAE (Variational AutoEncoder) only output random noise

Question

I have a VAE model based on the implementation from this book Hands-On Machine Learning. I have a customized dataset of grayscale images, each image has either circles, triangles, or rectangles at random locations. When I try to generate image with this VAE model, all I get is just some random noise back.

How I create the data:

image_size = (28,28)
portion_size = (5, 5)

def draw_image(shape, image_dir, n):

    for i in range(n):
      image = Image.new('L', (28, 28))
      draw = ImageDraw.Draw(image)

      if shape == "circle":
        used_positions_c = set()
        while True:
          x1 = random.randint(0, image_size[0] - portion_size[0] - 1)
          y1 = random.randint(0, image_size[1] - portion_size[1] - 1)
          if (x1, y1) not in used_positions_c:
            used_positions_c.add((x1, y1))
            break
        x2 = x1+portion_size[0]-1
        y2 = y1+portion_size[1]-1
        draw.ellipse([(x1, y1),(x2, y2)], fill=255)

      elif shape == "rectangle":
        used_positions_r = set()
        while True:
          x1 = random.randint(0, image_size[0]-portion_size[0]-1)
          y1 = random.randint(0, image_size[1]-portion_size[1]-3)
          if (x1, y1) not in used_positions_r:
            used_positions_r.add((x1, y1))
            break
        x2 = x1+portion_size[0]-1
        y2 = y1+portion_size[1]-3
        draw.rectangle([(x1, y1),(x2, y2)], fill=255)

      elif shape == "triangle":
        used_positions_t = set()
        while True:
          x = random.randint(0, image_size[0] - portion_size[0] - 1)
          y = random.randint(0, image_size[1] - portion_size[1] - 1)
          if (x, y) not in used_positions_t:
            used_positions_t.add((x, y))
            break
        x1, y1 = x, y + portion_size[1]
        x2, y2 = x + portion_size[0], y + portion_size[1]
        x3, y3 = x + portion_size[0] // 2, y
        draw.polygon([(x1, y1), (x2, y2), (x3, y3)], fill=255)

      image.save(f"{image_dir}_{i}.png")

n = 500
draw_image("circle", cirlcle_dir, n)
draw_image("rectangle", rectangle_dir, n)
draw_image("triangle", triangle_dir, n)

My model:

class Sampling(tf.keras.layers.Layer):
    def call(self, inputs):
        mean, log_var = inputs
        return tf.random.normal(tf.shape(log_var)) * tf.exp(log_var / 2) + mean

codings_size = 10

inputs = tf.keras.layers.Input(shape=[28, 28])
Z = tf.keras.layers.Flatten()(inputs)
Z = tf.keras.layers.Dense(150, activation="relu")(Z)
Z = tf.keras.layers.Dense(100, activation="relu")(Z)
codings_mean = tf.keras.layers.Dense(codings_size)(Z)  # μ
codings_log_var = tf.keras.layers.Dense(codings_size)(Z)  # γ
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = tf.keras.Model(
    inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])

decoder_inputs = tf.keras.layers.Input(shape=[codings_size])
x = tf.keras.layers.Dense(100, activation="relu")(decoder_inputs)
x = tf.keras.layers.Dense(150, activation="relu")(x)
x = tf.keras.layers.Dense(28 * 28)(x)
outputs = tf.keras.layers.Reshape([28, 28])(x)
variational_decoder = tf.keras.Model(inputs=[decoder_inputs], outputs=[outputs])

_, _, codings = variational_encoder(inputs)
reconstructions = variational_decoder(codings)
variational_ae = tf.keras.Model(inputs=[inputs], outputs=[reconstructions])

latent_loss = -0.5 * tf.reduce_sum(
    1 + codings_log_var - tf.exp(codings_log_var) - tf.square(codings_mean),
    axis=-1)
variational_ae.add_loss(tf.reduce_mean(latent_loss) / 784.)

variational_ae.compile(optimizer="nadam", loss="mse")
history = variational_ae.fit(X_train, X_train, epochs=25, batch_size=32,
                             validation_data=(X_test, X_test))

I have tried using binary crossentropy loss and adding a sigmoid function but no luck.

We do not take code in external links, all code has to be in the question as plain text. — Dr. Snoopy, Sep 01 '23 at 09:36

VAE (Variational AutoEncoder) only output random noise

0 Answers0