0

I am implementing a Decoder (a type of Artificial Neural Network) using keras:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

whose output is:

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 25)]              0         
                                                                 
 dense (Dense)               (None, 100)               2600      
                                                                 
 dense_1 (Dense)             (None, 1024)              103424    
                                                                 
 dense_2 (Dense)             (None, 4096)              4198400   
                                                                 
 reshape (Reshape)           (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 8, 8, 256)         590080    
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 8, 8, 128)         295040    
 Transpose)                                                      
                                                                 
 conv2d_transpose_2 (Conv2D  (None, 16, 16, 128)       147584    
 Transpose)                                                      
                                                                 
 conv2d_transpose_3 (Conv2D  (None, 16, 16, 64)        73792     
 Transpose)                                                      
                                                                 
 conv2d_transpose_4 (Conv2D  (None, 32, 32, 64)        36928     
 Transpose)                                                      
                                                                 
 conv2d_transpose_5 (Conv2D  (None, 32, 32, 1)         577       
 Transpose)                                                      
                                                                 

I want to adjust my model so that decoder_outputs shape is (None, 40, 40, 1) instead of (None, 32, 32, 1). This is what I tried to do:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=1600, activation="relu")(x)  # Adjusted units to match 40*40*1
x = layers.Reshape((40, 40, 1))(x)  # Reshaped to (40, 40, 1)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

but unfortunately decoder_outputs shape is (None, 160, 160, 1).

Can you help me, please?

EDIT

I tried the following solution:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="valid")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

that is using padding="same" for some layers, but this is the output I get:

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 25)]              0         
                                                                 
 dense (Dense)               (None, 100)               2600      
                                                                 
 dense_1 (Dense)             (None, 1024)              103424    
                                                                 
 dense_2 (Dense)             (None, 4096)              4198400   
                                                                 
 reshape (Reshape)           (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTr  (None, 8, 8, 256)         590080    
 anspose)                                                        
                                                                 
 conv2d_transpose_1 (Conv2D  (None, 8, 8, 128)         295040    
 Transpose)                                                      
                                                                 
 conv2d_transpose_2 (Conv2D  (None, 16, 16, 128)       147584    
 Transpose)                                                      
                                                                 
 conv2d_transpose_3 (Conv2D  (None, 18, 18, 64)        73792     
 Transpose)                                                      
                                                                 
 conv2d_transpose_4 (Conv2D  (None, 37, 37, 64)        36928     
 Transpose)                                                      
                                                                 
 conv2d_transpose_5 (Conv2D  (None, 39, 39, 1)         577       
 Transpose)

As you can see decoder_outputs shape is now (None, 39, 39, 1). I want it to be (None, 40, 40, 1). How may I fix?

tail
  • 355
  • 2
  • 11
  • Do you understand **why** the output has this shape? Exactly how do you want the desired output to relate to the current actual output? Exactly *why should it be possible* to get output of a different shape and size? What input do you provide to get this result, and how do you expect the shape of the input to relate to the shape of the output? Can you recreate the problem with a simpler example? – Karl Knechtel Aug 29 '23 at 18:15
  • @KarlKnechtel The output has this shape due to kernels and/or padding. I want to adjust my code because it worked for a previous version of a project of mine, and I need it to be updated – tail Aug 29 '23 at 18:18
  • can you please tell the input shape that you are providing your model? – Ismail Vohra Aug 29 '23 at 18:28
  • Perhaps some of the `layers.Conv2DTranspose` should have `padding='valid'` instead of `padding='same'`? That would make the output larger in spatial dimensions, if I understand correctly. – MKimiSH Aug 30 '23 at 08:10
  • @MKimiSH I tried your solution but got (None, 39, 39, 1) – tail Aug 30 '23 at 08:35

2 Answers2

0

I tried this way:

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="valid")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=2, padding="valid")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=2, activation="sigmoid", padding="valid")(x)

that is using padding="valid" for some layers and kernel_size=2 for the last layer.

tail
  • 355
  • 2
  • 11
0

so the reason I was asking for the model input was so that I could do the calculations to create your desired output. If you want to learn how to calculate the output for your CNN layers, here is a thread to help you out. Here is the code:

latent_dim = 25
latent_inputs = keras.Input(shape=(latent_dim,))

x = layers.Dense(units=100, activation="relu")(latent_inputs)
x = layers.Dense(units=1024, activation="relu")(x)
x = layers.Dense(units=4096, activation="relu")(x)
x = layers.Reshape((4, 4, 256))(x)
x = layers.Conv2DTranspose(filters=256, kernel_size=3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=128, kernel_size=3, activation="relu", strides=5, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
x = layers.Conv2DTranspose(filters=64, kernel_size=3, activation="relu", strides=1, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(filters=1, kernel_size=3, activation="sigmoid", padding="same")(x)

decoder = keras.Model(latent_inputs, decoder_outputs, name="decoder")

decoder.summary()

Here is the output:

Model: "decoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_46 (InputLayer)       [(None, 25)]              0         
                                                                 
 dense_138 (Dense)           (None, 100)               2600      
                                                                 
 dense_139 (Dense)           (None, 1024)              103424    
                                                                 
 dense_140 (Dense)           (None, 4096)              4198400   
                                                                 
 reshape_46 (Reshape)        (None, 4, 4, 256)         0         
                                                                 
 conv2d_transpose_265 (Conv2  (None, 8, 8, 256)        590080    
 DTranspose)                                                     
                                                                 
 conv2d_transpose_266 (Conv2  (None, 8, 8, 128)        295040    
 DTranspose)                                                     
                                                                 
 conv2d_transpose_267 (Conv2  (None, 40, 40, 128)      147584    
 DTranspose)                                                     
                                                                 
 conv2d_transpose_268 (Conv2  (None, 40, 40, 64)       73792     
 DTranspose)                                                     
                                                                 
 conv2d_transpose_269 (Conv2  (None, 40, 40, 64)       36928     
 DTranspose)                                                     
                                                                 
 conv2d_transpose_270 (Conv2  (None, 40, 40, 1)        577       
 DTranspose)                                                     
                                                                 
=================================================================
Total params: 5,448,425
Trainable params: 5,448,425
Non-trainable params: 0
_________________________________________________________________