I have a VAE model based on the implementation from this book Hands-On Machine Learning. I have a customized dataset of grayscale images, each image has either circles, triangles, or rectangles at random locations. When I try to generate image with this VAE model, all I get is just some random noise back.
How I create the data:
image_size = (28,28)
portion_size = (5, 5)
def draw_image(shape, image_dir, n):
for i in range(n):
image = Image.new('L', (28, 28))
draw = ImageDraw.Draw(image)
if shape == "circle":
used_positions_c = set()
while True:
x1 = random.randint(0, image_size[0] - portion_size[0] - 1)
y1 = random.randint(0, image_size[1] - portion_size[1] - 1)
if (x1, y1) not in used_positions_c:
used_positions_c.add((x1, y1))
break
x2 = x1+portion_size[0]-1
y2 = y1+portion_size[1]-1
draw.ellipse([(x1, y1),(x2, y2)], fill=255)
elif shape == "rectangle":
used_positions_r = set()
while True:
x1 = random.randint(0, image_size[0]-portion_size[0]-1)
y1 = random.randint(0, image_size[1]-portion_size[1]-3)
if (x1, y1) not in used_positions_r:
used_positions_r.add((x1, y1))
break
x2 = x1+portion_size[0]-1
y2 = y1+portion_size[1]-3
draw.rectangle([(x1, y1),(x2, y2)], fill=255)
elif shape == "triangle":
used_positions_t = set()
while True:
x = random.randint(0, image_size[0] - portion_size[0] - 1)
y = random.randint(0, image_size[1] - portion_size[1] - 1)
if (x, y) not in used_positions_t:
used_positions_t.add((x, y))
break
x1, y1 = x, y + portion_size[1]
x2, y2 = x + portion_size[0], y + portion_size[1]
x3, y3 = x + portion_size[0] // 2, y
draw.polygon([(x1, y1), (x2, y2), (x3, y3)], fill=255)
image.save(f"{image_dir}_{i}.png")
n = 500
draw_image("circle", cirlcle_dir, n)
draw_image("rectangle", rectangle_dir, n)
draw_image("triangle", triangle_dir, n)
My model:
class Sampling(tf.keras.layers.Layer):
def call(self, inputs):
mean, log_var = inputs
return tf.random.normal(tf.shape(log_var)) * tf.exp(log_var / 2) + mean
codings_size = 10
inputs = tf.keras.layers.Input(shape=[28, 28])
Z = tf.keras.layers.Flatten()(inputs)
Z = tf.keras.layers.Dense(150, activation="relu")(Z)
Z = tf.keras.layers.Dense(100, activation="relu")(Z)
codings_mean = tf.keras.layers.Dense(codings_size)(Z) # μ
codings_log_var = tf.keras.layers.Dense(codings_size)(Z) # γ
codings = Sampling()([codings_mean, codings_log_var])
variational_encoder = tf.keras.Model(
inputs=[inputs], outputs=[codings_mean, codings_log_var, codings])
decoder_inputs = tf.keras.layers.Input(shape=[codings_size])
x = tf.keras.layers.Dense(100, activation="relu")(decoder_inputs)
x = tf.keras.layers.Dense(150, activation="relu")(x)
x = tf.keras.layers.Dense(28 * 28)(x)
outputs = tf.keras.layers.Reshape([28, 28])(x)
variational_decoder = tf.keras.Model(inputs=[decoder_inputs], outputs=[outputs])
_, _, codings = variational_encoder(inputs)
reconstructions = variational_decoder(codings)
variational_ae = tf.keras.Model(inputs=[inputs], outputs=[reconstructions])
latent_loss = -0.5 * tf.reduce_sum(
1 + codings_log_var - tf.exp(codings_log_var) - tf.square(codings_mean),
axis=-1)
variational_ae.add_loss(tf.reduce_mean(latent_loss) / 784.)
variational_ae.compile(optimizer="nadam", loss="mse")
history = variational_ae.fit(X_train, X_train, epochs=25, batch_size=32,
validation_data=(X_test, X_test))
I have tried using binary crossentropy loss and adding a sigmoid function but no luck.