What format do these lists have to be in to be accepted by Keras Tuners Search function?

Question

This Code reads in a set of testing and training guitar jpg images for the neural net to learn and test from.

 import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import random

DATADIR = "C:/Users/TheKid/Data/DataMiningProject/DataSet"
CATEGORIES = ["Fender_Jazzmaster", "Gibson_ES"]
CATEGORIES2 = ["Test"]

for category in CATEGORIES:
    path = os.path.join(DATADIR,category)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
    
IMG_SIZE = 70

new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))

training_data = []
def create_training_data():
        for category in CATEGORIES:
            path = os.path.join(DATADIR,category)
            class_num = CATEGORIES.index(category)
            for img in os.listdir(path):
                img_array = cv2.imread(os.path.join(path,img),cv2.IMREAD_GRAYSCALE)
                new_array = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
                training_data.append([new_array,class_num])

create_training_data()

print(len(training_data))
random.shuffle(training_data)

X = []
y = []

for features, label in training_data:
    X.append(features)
    y.append(label)

X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)              

for category in CATEGORIES2:
    path2 = os.path.join(DATADIR,category)
    for img in os.listdir(path2):
        img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
    
IMG_SIZE = 70

new_array2 = cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))

testing_data = []
def create_testing_data():
        for category in CATEGORIES2:
            path2 = os.path.join(DATADIR,category)
            class_num2 = CATEGORIES2.index(category)
            for img in os.listdir(path2):
                img_array2 = cv2.imread(os.path.join(path2,img),cv2.IMREAD_GRAYSCALE)
                new_array2 = cv2.resize(img_array2,(IMG_SIZE,IMG_SIZE))
                testing_data.append([new_array2,class_num2])                
                
            

create_testing_data()

print(len(testing_data))
random.shuffle(testing_data)

X2 = []
y2 = []

for features, label in testing_data:
    X2.append(features)
    y2.append(label)

X2 = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1) 

import pickle

pickle_out = open("X.pickle" , "wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle" , "wb")
pickle.dump(y, pickle_out)
pickle_out.close()

pickle_in = open("X.pickle", "rb")
X = pickle.load(pickle_in)

pickle_out = open("X2.pickle" , "wb")
pickle.dump(X2, pickle_out)
pickle_out.close()

pickle_out = open("y2.pickle" , "wb")
pickle.dump(y2, pickle_out)
pickle_out.close()

pickle_in = open("X2.pickle", "rb")
X = pickle.load(pickle_in)

This next bit of code takes in the pickle file saved in previous code and is supposed to use Keras tuners search function to run different variants of the neural net with different amounts of conv layer ,layer sizes etc so I can choose the most efficient version. But when run this error gets thrown:

 ValueError: Data cardinality is ambiguous:
  x sizes: 1312
  y sizes: 12
Please provide data which shares the same first dimension.

The Shapes of all the variables are:

(x_train = (1312, 70, 70, 1)

y_train =(1312,)

x_test = (1312, 70, 70, 1)

y_test =(12,)

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import TensorBoard
import numpy as np
import time
import pickle
import matplotlib.pyplot as plt
from tensorflow import keras
from kerastuner.tuners import RandomSearch
from kerastuner.engine.hyperparameters import HyperParameters




pickle_in = open("X.pickle","rb")
x_train = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y_train = pickle.load(pickle_in)

pickle_in = open("X2.pickle","rb")
x_test = pickle.load(pickle_in)

pickle_in = open("y2.pickle","rb")
y_test = pickle.load(pickle_in)



x_train=np.array(x_train/255.0)
y_train=np.array(y_train)

x_test=np.array(x_test/255.0)
y_test=np.array(y_test)


LOG_DIR = f"{int(time.time())}"

def build_model(hp):
    model = keras.models.Sequential()

    model.add(Conv2D(hp.Int("input_units",32, 256, 32 ), (3, 3), input_shape=x_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    for i in range(hp.Int("n_layers", 1, 4)):
        model.add(Conv2D(hp.Int(f"conv-{i}_units",32, 256, 32 ), (3, 3)))
        model.add(Activation('relu'))
    

    model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors

    model.add(Dense(10))
    model.add(Activation("softmax"))

    model.compile(optimizer="adam",
                  loss="binary_crossentropy",
                  metrics=["accuracy"])

    return model

tuner = RandomSearch(
        build_model,
        objective = "val_accuracy",
        max_trials = 1,
        executions_per_trial = 1,
        directory = LOG_DIR)



tuner.search(x=x_train,
            y=y_train,
            epochs=1,
            batch_size=64,
            validation_data=(x_test,y_test))

with open(f"tuner_{int(time.time())}.pkl", "wb") as f:
    pickle.dump(tuner, f)

tuner = pickle.load(open(""))

print(tuner.get_best_hyperparameters()[0].values)

How would I go about resolving this error? It's seems like a matrix formatting issue to me but I have little experience in dealing with a problem like this.

I mention the error in the description above the second piece of code it reads: ValueError: Data cardinality is ambiguous: x sizes: 1312 y sizes: 12 Please provide data which shares the same first dimension. — ykseulb, Oct 30 '20 at 18:50
Ok. I formatted the question for better readability. The changes would be reflected after community members approval. — , Nov 02 '20 at 04:56

score 0 · Answer 1 · answered Nov 02 '20 at 05:02

As the error message and shape of the data (x_test and y_test) clearly suggests, you have 1312 rows in x_test and 12 rows in y_test. You are feeding this data to validation_data=(x_test,y_test).

Kindly pass the same dimension or same rows of data for x_test and y_test in validation_data and this should fix your error.

What format do these lists have to be in to be accepted by Keras Tuners Search function?

1 Answers1