I have a large dataset (30 000 images, 80kB each), and I'm trying to load it into my model in batches in order to not run of out memory on my GPU. My single image is a 200x200 numpy array, with values either 1 or 0, stored in csv file. So I don't need to resize it or use any image reader as it's already an array. I'm using a custom generator to achieve this:
class My_Custom_Generator(keras.utils.Sequence):
def __init__(self, image_filenames, labels, batch_size):
self.image_filenames = image_filenames
self.labels = labels
self.batch_size = batch_size
def __len__(self):
return (np.ceil(len(self.image_filenames) / float(self.batch_size))).astype(np.int)
def __getitem__(self, idx):
batch_x = self.image_filenames[idx * self.batch_size: (idx + 1) * self.batch_size]
batch_y = self.labels[idx * self.batch_size: (idx + 1) * self.batch_size]
return np.array(batch_x), np.array(batch_y)
My model looks like this:
X_train_filenames = np.load('X_train_filenames.npy')
y_train = np.load('y_train.npy')
X_val_filenames = np.load('X_val_filenames.npy')
y_val = np.load('y_val.npy')
batch_size = 256
my_training_batch_generator = My_Custom_Generator(X_train_filenames, y_train, batch_size)
my_validation_batch_generator = My_Custom_Generator(X_val_filenames, y_val, batch_size)
model = Sequential()
model.add(Conv2D(filters = 32, kernel_size = (3,3),input_shape=(200,200,1)))
#model.add(BatchNormalization(axis=3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.15))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(32))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit_generator(generator=my_training_batch_generator,
steps_per_epoch = int(y_train.shape[0] // batch_size),
epochs = 10,
verbose = 1,
validation_data = my_validation_batch_generator,
validation_steps = int(y_val.shape[0] // batch_size))
I get an error:
ValueError: Error when checking input: expected conv2d_1_input to have 4 dimensions, but got array with shape (256, 1)
In my understanding the input should be a 4 dimensional array of shape (None,200,200,1)
, but I don't know how to achieve it, since I just started learning how to load a dataset in parts and not all at once.