You should try using data generators.
A DataGenerator is an object that takes as input the X_train and y_train matrices and put the samples into batches following some criterion. It can also be used to handle large volumes of data that cannot be loaded at once on the virtual memory.
Here is an example on how to implement one !
Basically get_item will give you your next batch so that's the place to implement all the conditions you might need.
import numpy as np
import keras
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, X, labels, batch_size=32, dim=(32,32,32), n_channels=1,
n_classes=10, shuffle=True):
'Initialization'
self.dim = dim
self.batch_size = batch_size
self.labels = labels
self.X = X
self.n_channels = n_channels
self.n_classes = n_classes
self.shuffle = shuffle
self.on_epoch_end()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(len(self.X) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch to make sure samples dont repeat
list_IDs_temp = ... your code
# Generate data
X, y = self.__data_generation(list_IDs_temp)
return X, y
def on_epoch_end(self):
'Updates indexes after each epoch'
self.indexes = np.arange(len(self.X))
if self.shuffle == True:
np.random.shuffle(self.indexes)
def __data_generation(self, list_IDs_temp):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
# Initialization
X = np.empty((self.batch_size, *self.dim, self.n_channels))
y = np.empty((self.batch_size), dtype=int)
# Generate data
for i, ID in enumerate(list_IDs_temp):
# Store sample
X[i,] = self.X[ID,]
# Store class
y[i] = self.labels[ID]
return X, keras.utils.to_categorical(y, num_classes=self.n_classes)
Source: This