I am trying to classify 2 classes of images. Though I am getting high train and validation accuracy (0.97) after 10 epochs, my test results are awful (precision 0.48) and the confusion matrix shows the network is predicting the images for the wrong class (attached results).
There are only 2 classes in the dataset, each class has 10,000 image examples (after augmentation). I am using the VGG16 network. The full dataset is split 20% to test set (this split was performed by taking random images from each class therefore it is shuffled). The remaining images are split to 80% train and 20% valid sets (as indicated in the ImageDataGenerator line of the code). So in the end there are:
12,904 Train images belonging to 2 classes
3,224 Valid images belonging to 2 classes
4,032 Test images belonging to 2 classes
This is my code:
def CNN(CNN='VGG16', choice='predict', prediction='./dataset/Test/image.jpg'):
''' Train images using one of several CNNs '''
Train = './dataset/Train'
Tests = './dataset/Test'
shape = (224, 224)
epochs = 10
batches = 16
classes = []
for c in os.listdir(Train): classes.append(c)
IDG = keras.preprocessing.image.ImageDataGenerator(validation_split=0.2)
train = IDG.flow_from_directory(Train, target_size=shape, color_mode='rgb',
classes=classes, batch_size=batches, shuffle=True, subset='training')
valid = IDG.flow_from_directory(Train, target_size=shape, color_mode='rgb',
classes=classes, batch_size=batches, shuffle=True, subset='validation')
tests = IDG.flow_from_directory(Tests, target_size=shape, color_mode='rgb',
classes=classes, batch_size=batches, shuffle=True)
input_shape = train.image_shape
if CNN == 'VGG16' or 'vgg16':
model = VGG16(weights=None, input_shape=input_shape,
classes=len(classes))
elif CNN == 'VGG19' or 'vgg19':
model = VGG19(weights=None, input_shape=input_shape,
classes=len(classes))
elif CNN == 'ResNet50' or 'resnet50':
model = ResNet50(weights=None, input_shape=input_shape,
classes=len(classes))
elif CNN == 'DenseNet201' or 'densenet201':
model = DenseNet201(weights=None, input_shape=input_shape,
classes=len(classes))
model.compile(optimizer=keras.optimizers.SGD(
lr=1e-3,
decay=1e-6,
momentum=0.9,
nesterov=True),
loss='categorical_crossentropy',
metrics=['accuracy'])
Esteps = int(train.samples/train.next()[0].shape[0])
Vsteps = int(valid.samples/valid.next()[0].shape[0])
if choice == 'train':
history= model.fit_generator(train,
steps_per_epoch=Esteps,
epochs=epochs,
validation_data=valid,
validation_steps=Vsteps,
verbose=1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
Y_pred = model.predict_generator(tests, verbose=1)
y_pred = np.argmax(Y_pred, axis=1)
matrix = confusion_matrix(tests.classes, y_pred)
df_cm = pd.DataFrame(matrix, index=classes, columns=classes)
plt.figure(figsize=(10,7))
sn.heatmap(df_cm, annot=True)
print(classification_report(tests.classes,y_pred,target_names=classes))
model.save_weights('weights.h5')
elif choice == 'predict':
model.load_weights('./weights.h5')
img = image.load_img(prediction, target_size=shape)
im = image.img_to_array(img)
im = np.expand_dims(im, axis=0)
if CNN == 'VGG16' or 'vgg16':
im = keras.applications.vgg16.preprocess_input(im)
prediction = model.predict(im)
print(prediction)
elif CNN == 'VGG19' or 'vgg19':
im = keras.applications.vgg19.preprocess_input(im)
prediction = model.predict(im)
print(prediction)
elif CNN == 'ResNet50' or 'resnet50':
im = keras.applications.resnet50.preprocess_input(im)
prediction = model.predict(im)
print(prediction)
print(keras.applications.resnet50.decode_predictions(prediction))
elif CNN == 'DenseNet201' or 'densenet201':
im = keras.applications.densenet201.preprocess_input(im)
prediction = model.predict(im)
print(prediction)
print(keras.applications.densenet201.decode_predictions(prediction))
CNN(CNN='VGG16', choice='train')
Results:
precision recall f1-score support
Predator 0.49 0.49 0.49 2016
Omnivore 0.49 0.49 0.49 2016
accuracy -- -- 0.49 4032
I suspect that the ImageDataGenerator() is not shuffling the images "before" the train/valid split. If this is the case how can i force the ImageDataGenerator here in Keras to shuffle the dataset before the split?
If shuffling is not the case, how can i solve my issue? what am I doing wrong?