0

I collected this code and got great accuracy on training and validation accuracy (more than 90%). But it shows a disastrous performance metrics. Here is the collected code:

from google.colab import drive
drive.mount('/content/drive')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob

training_dir = '/content/drive/MyDrive/43 Batch Group 18/Modified Dataset/train'
validation_dir = '/content/drive/MyDrive/43 Batch Group 18/Modified Dataset/test'

image_files = glob(training_dir + '/*/*.jp*g') + glob(training_dir + '/*/*.png')
valid_image_files = glob(validation_dir + '/*/*.jp*g') + glob(validation_dir + '/*/*.png')

folders = glob(training_dir + '/*')
num_classes = len(folders)
print ('Total Classes = ' + str(num_classes))

!mkdir ~/.keras
!mkdir ~/.keras/models
!cp ../input/keras-pretrained-models/*notop* ~/.keras/models/
!cp ../input/keras-pretrained-models/imagenet_class_index.json ~/.keras/models/

from keras.models import Model
from keras.layers import Flatten, Dense
from keras.applications import VGG19
#from keras.preprocessing import image

IMAGE_SIZE = [128, 128]  # we will keep the image size as (64,64). You can increase the size for better results. 

vgg = VGG19(input_shape = IMAGE_SIZE + [3], weights = 'imagenet', include_top = False)  # input_shape = (64,64,3) as required by VGG

for layer in vgg.layers:
    layer.trainable = False

x = Flatten()(vgg.output)
x = Dense(num_classes, activation = 'softmax')(x)  # adding the output layer with softmax function as this is a multi label classification problem.

model = Model(inputs = vgg.input, outputs = x)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import preprocess_input

training_datagen = ImageDataGenerator(
                                    rescale=1./255,
                                    shear_range=0.2, 
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    preprocessing_function=preprocess_input)

validation_datagen = ImageDataGenerator(rescale = 1./255, preprocessing_function=preprocess_input)

training_generator = training_datagen.flow_from_directory(training_dir, target_size = IMAGE_SIZE, batch_size = 256, class_mode = 'categorical')
validation_generator = validation_datagen.flow_from_directory(validation_dir, target_size = IMAGE_SIZE, batch_size = 64, class_mode = 'categorical')

training_generator.class_indices

training_images = 15167
validation_images = 837

history = model.fit(training_generator,
                   epochs = 10, 
                   validation_data = validation_generator)


import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.legend()
plt.show()
plt.savefig('LossVal_loss')

plt.plot(history.history['accuracy'], label='train acc')
plt.plot(history.history['val_accuracy'], label='val acc')
plt.legend()
plt.show()
plt.savefig('AccVal_acc')

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from mlxtend.plotting import plot_confusion_matrix

prediction = model.predict(validation_generator)
pred = np.argmax(prediction, axis=1)

y_true = validation_generator.classes

cm = confusion_matrix(y_true, pred)
print(cm)

target_names=['cardiomegaly', 'normal', 'pneumonia','tuberculosis']
report = classification_report(y_true, pred, target_names=target_names)
print(report)

plot_confusion_matrix(cm, figsize=(5,5))

accuracy = accuracy_score(y_true, pred)
print('Accuracy: {:.2f}%'.format(accuracy*100))

Output Performance Metrics:

image

Model is getting trained accurately but not providing a satisfactory performance metrics. Can anyone tell me what is the reason behind this or what do I need to change in the performance metrics code?

Juliano Alves
  • 2,006
  • 4
  • 35
  • 37

0 Answers0