I am building a VGG image pipeline, and I am trying to input 2 consecutive frames from a video as follows:
datagen = ImageDataGenerator()
datagen.fit(X_train)
model = Sequential()
model.add(Conv2D(input_shape=(224, 224, 6), filters=64, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Conv2D(filters=512, kernel_size=(3, 3), padding='same', activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(MaxPool2D(pool_size=(2, 2), strides=(2, 2)))
model.add(GlobalAveragePooling2D())
model.add(Dense(units=4096, activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Dropout(0.2))
model.add(Dense(units=4096, activation='relu', kernel_initializer='he_uniform', bias_initializer='zeros'))
model.add(Dropout(0.2))
model.add(Dense(units=1, activation='sigmoid'))
# opt = Adam(learning_rate=0.001)
opt = SGD(lr=0.01, momentum=0.3)
checkpoint = ModelCheckpoint(config.CLASH_PATH() + '/models/step_01.h5', monitor='binary_accuracy', verbose=1, save_best_only=True,
save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='binary_accuracy', min_delta=0, patience=40, verbose=1, mode='auto')
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['binary_accuracy'])
model.summary()
model.fit(datagen.flow(X_train, y_train, batch_size=32,
subset='training', ignore_class_split=True), validation_data=datagen.flow(X_train, y_train, batch_size=16,
subset='validation', ignore_class_split=True), steps_per_epoch=len(X_train) / 48,
epochs=1000, verbose=1,
callbacks=[checkpoint, early])
As you'll note in the 4th line of code, I am passing 224x224x6 which represents two stacked image frames of 224x224x3. This is necessary since I am using the ImageDataGenerator to pass my data.
Unfortunately I am getting the following error message:
NumpyArrayIterator is set to use the data format convention "channels_last" (channels on axis 3), i.e. expected either 1, 3, or 4 channels on axis 3. However, it was passed an array with shape (6666, 224, 224, 6) (6 channels).
From other reading on stackoverflow, I have seen that I can stack my frames using layers.concatenate, but how would I then modify my generator to keep the flow of frames in sync?