2

I am trying to do image classification to distinguish cats and dogs. I want to do this by training a one-class neural network. I've tried this method with the VGG16 model, but now I want to use the ResNet50 as it is smaller and therefore hopefully faster. For the VGG16 the below method worked, but not for the ResNet50.

import os
import random
import itertools
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.applications import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D,MaxPool2D
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.optimizers import Adam
import keras
import random
import requests
import keras
from keras.models import Model
from keras.layers import Dense
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.applications.vgg16 import decode_predictions
from keras import applications
from keras.optimizers import RMSprop

import pickle

import cv2

base_path = "datafolder/"
SHAPE = (224,224,3)
batch_size = 10

def to_grayscale_then_rgb(image):
    image = tf.image.rgb_to_grayscale(image)
    image = tf.image.grayscale_to_rgb(image)
    return image

def wrap_generator(generator):
    while True:
       x,y = next(generator)
       y = tf.keras.utils.to_categorical(y)
       zeros = tf.zeros_like(y) + tf.constant([1.,0.])
       y = tf.concat([y,zeros], axis=0)
    
       yield x,y

def set_seed(seed):

    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)


def get_model(train=True):

    set_seed(33)

    pre_process = Lambda(tf.keras.applications.resnet50.preprocess_input)
    vgg = applications.ResNet50(weights = 'imagenet', include_top = True, input_shape = SHAPE)
    vgg = Model(vgg.input, vgg.layers[-3].output)
    vgg.trainable = False

    inp = Input(SHAPE)
    vgg_16_process = pre_process(GaussianNoise(0.1)(inp))
    vgg_out = vgg(vgg_16_process)

    noise = Lambda(tf.zeros_like)(vgg_out)
    noise = GaussianNoise(0.1)(noise)

    if train:
        x = Lambda(lambda z: tf.concat(z, axis=0))([vgg_out,noise])
        x = Activation('relu')(x)
    else:
        x = vgg_out
    
    x = Dense(512, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    out = Dense(2, activation='softmax')(x)
    model = Model(inp, out)
    model.compile(Adam(lr=1e-4), loss='binary_crossentropy')

    return model

### FLOW GENERATORS ###

train_generator = train_datagen.flow_from_directory(
            base_path + 'training_set/training_set/',
            target_size = (SHAPE[0], SHAPE[1]),
            batch_size = batch_size,
            class_mode = 'categorical',
            shuffle = True,
            seed = 33,
            classes = ['dogs']
    )

model = get_model()

model.summary()

model.fit(wrap_generator(train_generator), steps_per_epoch=train_generator.samples/train_generator.batch_size, epochs=30)

For the VGG16 model this worked, however when trying to implement the ResNet50 model I got the following error:

ValueError: logits and labels must have the same shape ((None, 7, 7, 2) vs (None, None))

I suspect that this has to do with the structure of the neural network, which is the following:

Model: "model_57"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_59 (InputLayer)           [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
gaussian_noise_56 (GaussianNois (None, 224, 224, 3)  0           input_59[0][0]                   
__________________________________________________________________________________________________
lambda_98 (Lambda)              (None, 224, 224, 3)  0           gaussian_noise_56[0][0]          
__________________________________________________________________________________________________
model_56 (Functional)           (None, 7, 7, 2048)   23587712    lambda_98[0][0]                  
__________________________________________________________________________________________________
lambda_99 (Lambda)              (None, 7, 7, 2048)   0           model_56[0][0]                   
__________________________________________________________________________________________________
gaussian_noise_57 (GaussianNois (None, 7, 7, 2048)   0           lambda_99[0][0]                  
__________________________________________________________________________________________________
lambda_100 (Lambda)             (None, 7, 7, 2048)   0           model_56[0][0]                   
                                                                 gaussian_noise_57[0][0]          
__________________________________________________________________________________________________
activation_686 (Activation)     (None, 7, 7, 2048)   0           lambda_100[0][0]                 
__________________________________________________________________________________________________
dense_84 (Dense)                (None, 7, 7, 512)    1049088     activation_686[0][0]             
__________________________________________________________________________________________________
dense_85 (Dense)                (None, 7, 7, 128)    65664       dense_84[0][0]                   
__________________________________________________________________________________________________
dense_86 (Dense)                (None, 7, 7, 2)      258         dense_85[0][0]                   
==================================================================================================
Total params: 24,702,722
Trainable params: 1,115,010
Non-trainable params: 23,587,712

The last layer contains two 7's while for the VGG16 this didn't. I've tried changing multiple things in the code, but the problem did not seem to disappear. I suspect something is wrong with the get_model function. What could be causing this problem?

Marco Cerliani
  • 21,233
  • 3
  • 49
  • 54
Freek Cool
  • 47
  • 3

1 Answers1

1

Simply changing:

vgg = Model(vgg.input, vgg.layers[-3].output)

into:

vgg = Model(vgg.input, vgg.layers[-2].output)

should works.

This is because we need a layer that outputs a 2D tensor. When using VGG16, the layer in the position [-3] is a Flatten layer so it's ok. When using RESNET50, we need to use the layer in the position [-2] (GlobalPooling) in order to get a 2D output

Marco Cerliani
  • 21,233
  • 3
  • 49
  • 54