VGG19 neural network for single-channel images using in SRGAN

Question

I am trying to write a VGG19 neural network for single-channel images, where everything is essentially the same as in a three-channel network except for the input layer.

def model(self, inputShape=(64, 64, 1)):
    inputLayer = Input(shape=inputShape)

After applying the Flatten layer to the convolution tensor I use the same dense layer parameters as in classic VGG19 but I get an error when compiling the model

ValueError: Shapes (None, 64, 64, 1) and (None, 1000) are incompatible

As far as I understand the number of neurons in dense layer should correspond to the dimensionality of the input data. That is 64x64 image, after applying the Flatten layer, the dense layer should receive a vector with 4096 neurons. As described in the classical model

    layerSet = Flatten()(layerSet)
    layerSet = Dense(4096, activation='relu')(layerSet)
    layerSet = Dropout(0.5)(layerSet)
    layerSet = Dense(4096, activation='relu')(layerSet)
    layerSet = Dropout(0.5)(layerSet)
    outputLayer = Dense(1000, activation='relu')(layerSet)

The last dense layer gets 1000 neurons, each corresponding to some recognizable class. In my case, I need a set of features for SRGAN, so I doubt that for my problem there is a need to use classification vector. Features derived from VGG19 model in association with features derived from discriminative model should be passed as output layer of generative-competitive model.

Next I give you the full code example where I give the model itself and the training method. I expect to eventually get the required features from the model

class VGG19DeepConvolutionNetwork:
    __model = None

    def __init__(self):
        self.model()

    def model(self, inputShape=(64, 64, 1)):

        inputLayer = Input(shape=inputShape)

        layerSet = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputLayer)
        layerSet = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(layerSet)
        layerSet = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Flatten()(layerSet)
        layerSet = Dense(4096, activation='relu')(layerSet)
        layerSet = Dropout(0.5)(layerSet)
        layerSet = Dense(4096, activation='relu')(layerSet)
        layerSet = Dropout(0.5)(layerSet)
        outputLayer = Dense(1000, activation='relu')(layerSet)

        self.__model = Model(inputs=[inputLayer], outputs=[outputLayer])

        self.__model.compile(optimizer='adam', loss='categorical_crossentropy')
        print(self.__model.summary())

    def train(self, imageDataPath:string='srgangImageData.h5', weightsPath:string='vgg19Weights.h5', sliceSize=32, epochsNumber=100):
        if self.__model is None:
            self.model((sliceSize, sliceSize, 1))

        imageData = ImageDataProcessing()
        sourceTrain, targetTrain, sourceTest, targetTest = imageData.readImageData(imageDataPath)
        del imageData

        print( 'train source', sourceTrain.shape )
        print( 'train target', targetTrain.shape )
        print( 'test source', sourceTest.shape )
        print( 'test target', targetTest.shape )

        checkpoint = ModelCheckpoint(weightsPath, verbose=1, save_best_only=True, save_weights_only=False, mode='min')
        callbacks_list = [checkpoint]

        history = self.__model.fit(sourceTrain, targetTrain, batch_size=128, steps_per_epoch=len(sourceTrain)//128, validation_data=(sourceTest, targetTest),
                         callbacks=callbacks_list, shuffle=True, epochs=epochsNumber, verbose=1)

Naphat Amundsen · Accepted Answer · 2022-09-11T12:17:23.517

Some corrections:

The flatten layer should result with 2 x 2 x 512 = 2048 parameters as that is the output of the last convolutional layer. Tensorflow/keras should infer that for you.
The reason the last layer gets 1000 neurons is because the model was originally trained on a dataset with 1000 classes (1 neuron per class).

What version of tensorflow are you using? Are you sure it is failing at the compile step? I tried to compile your model with tensorflow 2.10.0 (Python 3.10.4) and everything worked fine. I tried to do a forward pass with an input of (10,64,64,1) and that worked fine too.

Here is the code I tried both locally and in Google Colab:

from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras import Model
import tensorflow as tf

class VGG19DeepConvolutionNetwork:
    __model = None

    def __init__(self):
        self.model()

    def model(self, inputShape=(64, 64, 1)):

        inputLayer = Input(shape=inputShape)

        layerSet = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(inputLayer)
        layerSet = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(layerSet)
        layerSet = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(layerSet)
        layerSet = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(layerSet)
        layerSet = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(layerSet)
        layerSet = MaxPooling2D(strides=(2,2), padding='same')(layerSet)

        layerSet = Flatten()(layerSet)
        layerSet = Dense(4096, activation='relu')(layerSet)
        layerSet = Dropout(0.5)(layerSet)
        layerSet = Dense(4096, activation='relu')(layerSet)
        layerSet = Dropout(0.5)(layerSet)
        outputLayer = Dense(1000, activation='relu')(layerSet)

        self.__model = Model(inputs=[inputLayer], outputs=[outputLayer])

        self.__model.compile(optimizer='adam', loss='categorical_crossentropy')
        print(self.__model.summary())

    def getModel(self):
        return self.__model

    def train(self, imageDataPath: str='srgangImageData.h5', weightsPath: str='vgg19Weights.h5', sliceSize=32, epochsNumber=100):
        if self.__model is None:
            self.model((sliceSize, sliceSize, 1))

        imageData = ImageDataProcessing()
        sourceTrain, targetTrain, sourceTest, targetTest = imageData.readImageData(imageDataPath)
        del imageData

        print( 'train source', sourceTrain.shape )
        print( 'train target', targetTrain.shape )
        print( 'test source', sourceTest.shape )
        print( 'test target', targetTest.shape )

        checkpoint = ModelCheckpoint(weightsPath, verbose=1, save_best_only=True, save_weights_only=False, mode='min')
        callbacks_list = [checkpoint]

        history = self.__model.fit(sourceTrain, targetTrain, batch_size=128, steps_per_epoch=len(sourceTrain)//128, validation_data=(sourceTest, targetTest),
                         callbacks=callbacks_list, shuffle=True, epochs=epochsNumber, verbose=1)

modelWrapper = VGG19DeepConvolutionNetwork()
model = modelWrapper.getModel()
X = tf.random.uniform((10,64,64,1))
output = model(X)
print(output)
# modelWrapper.train()

@ArturMangus I just tried to run my code in colab and that also worked fine. I have updated my answer with my code (which is basically your code). Are there any significant differences between my and your code? — Naphat Amundsen, Sep 11 '22 at 12:18

VGG19 neural network for single-channel images using in SRGAN

1 Answers1