Confusion Matrix ValueError: Found input variables with inconsistent numbers of samples: [3, 360]

Question

I'm trying to train a data set and output a confusion matrix after the dataset has been trained.

Here is the code

import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Input

from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Softmax
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Convolution2D

import os
import numpy as np
import matplotlib.pyplot as plt

import scipy as sp 
from scipy import signal
from scipy.signal import chirp
import numpy.fft
from numpy.fft import fft as rf
import random
import pandas as pd
import sklearn.model_selection as model_selection
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator 

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Input

from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Softmax
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.layers import Convolution2D

import os
import numpy as np
import matplotlib.pyplot as plt

import scipy as sp 
from scipy import signal
from scipy.signal import chirp
import numpy.fft
from numpy.fft import fft as rf
import random
import pandas as pd
import sklearn.model_selection as model_selection
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split

from sklearn.datasets import make_blobs
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import brier_score_loss
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split

from PIL import Image
import imageio as io 
import glob
from matplotlib import image

  
import h5py
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Activation, concatenate
from tensorflow.keras.layers import Flatten, Dropout
from tensorflow.keras.layers import Convolution2D, MaxPooling2D
from tensorflow.keras.layers import AveragePooling2D

from tensorflow.keras.layers import Input, Conv2D, Concatenate, \
     MaxPool2D, GlobalAvgPool2D, Activation

def squeezenet(input_shape, n_classes):
  
  def fire(x, fs, fe):
    s = Conv2D(fs, 1, activation='relu')(x)
    e1 = Conv2D(fe, 1, activation='relu')(s)
    e3 = Conv2D(fe, 3, padding='same', activation='relu')(s)
    output = Concatenate()([e1, e3])
    return output
  
  
  input = Input(input_shape)
  
  x = Conv2D(96, 7, strides=2, padding='same', activation='relu')(input)
  x = MaxPool2D(3, strides=2, padding='same')(x)
  
  x = fire(x, 16, 64)
  x = fire(x, 16, 64)
  x = fire(x, 32, 128)
  x = MaxPool2D(3, strides=2, padding='same')(x)
  
  x = fire(x, 32, 128)
  x = fire(x, 48, 192)
  x = fire(x, 48, 192)
  x = fire(x, 64, 256)
  x = fire(x, 64, 256)
  x = MaxPool2D(3, strides=2, padding='same')(x)
    
  x = Dropout(0.6)(x)
    
  x = Conv2D(n_classes, 1)(x)
  x = GlobalAvgPool2D()(x)
  x = Flatten()(x)
  
  output = Activation('softmax')(x)
  
  model = Model(input, output)
  return model

  import pathlib
import PIL

test_datagen = ImageDataGenerator(rescale=1./255)


data_dir = os.path.join(r"location/directory of the file", "file")

data_dir = pathlib.Path(data_dir)

image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)

rect = list(data_dir.glob('Rect/*'))
PIL.Image.open(str(rect[1]))


batch_size = 32
img_height = 227
img_width = 227

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.1,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.1,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

class_names = train_ds.class_names
print(range(len(class_names)))

AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

normalization_layer = layers.experimental.preprocessing.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image)) 

from keras.optimizers import SGD
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator


model = squeezenet((227,227,3),2)

sgd = SGD(lr=0.001, decay=0.0002, momentum=0.9, nesterov=True)
model.compile(
optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])

print(model.summary())

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = epoch #range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss=history.history['loss']
val_loss=history.history['val_loss']

epochs_range = epoch #range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

#supply a imafe to classifer to get an image out 
#calculate the confusion matrix manualy 

from sklearn.metrics import classification_report, confusion_matrix


Y_pred = model.predict_generator(val_ds, 720 // 32+1)
y_pred = np.argmax(Y_pred, axis=1)
print(y_pred.shape)
print('Confusion Matrix')
print(confusion_matrix(class_names, y_pred))
print('Classification Report')
target_names = ['Cats', 'Dogs', 'Horse']
print(classification_report(class_names, y_pred, target_names=target_names))

Here is the error i'm getting.

ValueError                                Traceback (most recent call last)
<ipython-input-9-5188ce05905a> in <module>
      9 print(y_pred.shape)
     10 print('Confusion Matrix')
---> 11 print(confusion_matrix(class_names, y_pred))
     12 print('Classification Report')
     13 target_names = ['Cats', 'Dogs', 'Horse']

in confusion_matrix(y_true, y_pred, labels, sample_weight)
    251 
    252     """
--> 253     y_type, y_true, y_pred = _check_targets(y_true, y_pred)
    254     if y_type not in ("binary", "multiclass"):
    255         raise ValueError("%s is not supported" % y_type)

in _check_targets(y_true, y_pred)
     69     y_pred : array or indicator matrix
     70     """
---> 71     check_consistent_length(y_true, y_pred)
     72     type_true = type_of_target(y_true)
     73     type_pred = type_of_target(y_pred)

in check_consistent_length(*arrays)
    203     if len(uniques) > 1:
    204         raise ValueError("Found input variables with inconsistent numbers of"
--> 205                          " samples: %r" % [int(l) for l in lengths])
    206 
    207 

ValueError: Found input variables with inconsistent numbers of samples: [3, 360]

Please remove the (lots of...) duplicate imports, as well as stuff irrelevant to the issue at hand (plots etc); see why [a wall of code isn't helpful](http://idownvotedbecau.se/toomuchcode/) — desertnaut, Sep 08 '20 at 17:19

etiennedm · Answer 1 · 2020-09-08T16:23:08.987

0

Your parameters in the confusion_matrix method should be y_true and y_pred as you could find in the doc.

It seems that your first parameter y_true (=class_names) is of size 3 and your second y_pred of size 360. However both should have the same size since y_pred is the estimated results of your classification and y_true is the ground truth of it.

Here is the sklearn associated example using 3 classes: [0, 1, 2]

from sklearn.metrics import confusion_matrix
y_true = [2, 0, 2, 2, 0, 1]
y_pred = [0, 0, 2, 2, 0, 2]
confusion_matrix(y_true, y_pred)

EDIT

How y_true is constructed ?

Normally y_true contains each of the label that corresponds to each of your input. You seem to give 360 inputs to your classifier, so you should have for each input an associated label, one of your class_names. This full vector containing the true labels of your input is y_true.

edited Sep 08 '20 at 16:23

answered Sep 08 '20 at 15:29

etiennedm

409
1
3
9

In this case what would be Y true – shiv shah Sep 08 '20 at 15:41
I have updated the question: your `y_true` should have the same size as your `y_pred`. `y_true` contains the true values of the expected results. Hope it helps – etiennedm Sep 08 '20 at 15:51
I guess the other question would be since my validation size is 360. That is what is being trained so how would i create Y True to get that to work with my confusion matrix – shiv shah Sep 08 '20 at 15:54
I have just updated the question to give details on how to construct `y_true`. Hope it helps – etiennedm Sep 08 '20 at 16:23

Confusion Matrix ValueError: Found input variables with inconsistent numbers of samples: [3, 360]

1 Answers1