I'm working on the problem classify activity get out the car and get in. Also need to classify if upload and download activity going near the car
Need advice how to fix problem of overfitting model in testing dataset
Using CNN + LSTM architecture. In the attachment i've provided samples of the dataset. Have around 15 000 images for each class
Dataset example
Now let's go to code.
First i get my dataset using keras
batch_size = 128
batch_size_train = 148
def bring_data_from_directory():
datagen = ImageDataGenerator(rescale=1./255)
train_generator = datagen.flow_from_directory(
'train',
target_size=(224, 224),
batch_size=batch_size,
class_mode='categorical', # this means our generator will only yield batches of data, no labels
shuffle=True,
classes=['get_on','get_off','load','unload'])
validation_generator = datagen.flow_from_directory(
'validate',
target_size=(224, 224),
batch_size=batch_size,
class_mode='categorical', # this means our generator will only yield batches of data, no labels
shuffle=True,
classes=['get_on','get_off','load','unload'])
return train_generator,validation_generator
Use VGG16 network to extract features and store them into .npy format
def load_VGG16_model():
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224,224,3))
print ("Model loaded..!")
print (base_model.summary())
return base_model
def extract_features_and_store(train_generator,validation_generator,base_model):
x_generator = None
y_lable = None
batch = 0
for x,y in train_generator:
if batch == int(56021/batch_size):
break
print("Total needed:", int(56021/batch_size))
print ("predict on batch:",batch)
batch+=1
if np.any(x_generator)==None:
x_generator = base_model.predict_on_batch(x)
y_lable = y
print (y)
else:
x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
y_lable = np.append(y_lable,y,axis=0)
print (y)
x_generator,y_lable = shuffle(x_generator,y_lable)
np.save(open('video_x_VGG16.npy', 'wb'), x_generator)
np.save(open('video_y_VGG16.npy','wb'),y_lable)
batch = 0
x_generator = None
y_lable = None
for x,y in validation_generator:
if batch == int(3971/batch_size):
break
print("Total needed:", int(3971/batch_size))
print ("predict on batch validate:",batch)
batch+=1
if np.any(x_generator)==None:
x_generator = base_model.predict_on_batch(x)
y_lable = y
print (y)
else:
x_generator = np.append(x_generator,base_model.predict_on_batch(x),axis=0)
y_lable = np.append(y_lable,y,axis=0)
print (y)
x_generator,y_lable = shuffle(x_generator,y_lable)
np.save(open('video_x_validate_VGG16.npy', 'wb'),x_generator)
np.save(open('video_y_validate_VGG16.npy','wb'),y_lable)
train_data = np.load(open('video_x_VGG16.npy', 'rb'))
train_labels = np.load(open('video_y_VGG16.npy', 'rb'))
train_data,train_labels = shuffle(train_data,train_labels)
print(train_data)
validation_data = np.load(open('video_x_validate_VGG16.npy', 'rb'))
validation_labels = np.load(open('video_y_validate_VGG16.npy', 'rb'))
validation_data,validation_labels = shuffle(validation_data,validation_labels)
train_data = train_data.reshape(train_data.shape[0],
train_data.shape[1] * train_data.shape[2],
train_data.shape[3])
validation_data = validation_data.reshape(validation_data.shape[0],
validation_data.shape[1] * validation_data.shape[2],
validation_data.shape[3])
return train_data,train_labels,validation_data,validation_labels
Model
def train_model(train_data,train_labels,validation_data,validation_labels):
print("SHAPE OF DATA : {}".format(train_data.shape))
model = Sequential()
model.add(LSTM(2048, stateful=True, activation='relu', kernel_regularizer=l2(0.0000001), activity_regularizer=l2(0.0000001), kernel_initializer='glorot_uniform', return_sequences=True, bias_initializer='zeros', dropout=0.2 , batch_input_shape=( batch_size_train, train_data.shape[1],
train_data.shape[2])))
model.add(LSTM(1024, stateful=True, activation='relu', kernel_regularizer=l2(0.0000001), activity_regularizer=l2(0.0000001), kernel_initializer='glorot_uniform', return_sequences=True, bias_initializer='zeros', dropout=0.2))
model.add(LSTM(512, stateful=True, activation='relu', kernel_regularizer=l2(0.0000001), activity_regularizer=l2(0.0000001), kernel_initializer='glorot_uniform', return_sequences=True, bias_initializer='zeros', dropout=0.2))
model.add(LSTM(128, stateful=True, activation='relu', kernel_regularizer=l2(0.0000001), activity_regularizer=l2(0.0000001), kernel_initializer='glorot_uniform', bias_initializer='zeros', dropout=0.2))
model.add(Dense(1024, kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01), kernel_initializer='random_uniform', bias_initializer='zeros', activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(4, kernel_initializer='random_uniform', bias_initializer='zeros', activation='softmax'))
adam = Adam(lr=0.00005, decay = 1e-6, clipnorm=1.0, clipvalue=0.5)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
callbacks = [ EarlyStopping(monitor='val_loss', patience=10, verbose=0), ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0), ModelCheckpoint('video_1_LSTM_1_1024.h5', monitor='val_loss', save_best_only=True, verbose=0) ]
nb_epoch = 500
model.fit(train_data,train_labels,validation_data=(validation_data,validation_labels),batch_size=batch_size_train,nb_epoch=nb_epoch,callbacks=callbacks,shuffle=True,verbose=1)
return model
LOGS
Train on 55796 samples, validate on 3552 samples
Epoch 1/500
55796/55796 [==============================] - 209s 4ms/step - loss: 2.0079 - acc: 0.4518 - val_loss: 1.6785 - val_acc: 0.6166
Epoch 2/500
55796/55796 [==============================] - 205s 4ms/step - loss: 1.3974 - acc: 0.8347 - val_loss: 1.3561 - val_acc: 0.6740
Epoch 3/500
55796/55796 [==============================] - 205s 4ms/step - loss: 1.1181 - acc: 0.8628 - val_loss: 1.1961 - val_acc: 0.7311
Epoch 4/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.9644 - acc: 0.8689 - val_loss: 1.1276 - val_acc: 0.7218
Epoch 5/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.8681 - acc: 0.8703 - val_loss: 1.0483 - val_acc: 0.7435
Epoch 6/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.7944 - acc: 0.8717 - val_loss: 0.9755 - val_acc: 0.7641
Epoch 7/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.7296 - acc: 0.9245 - val_loss: 0.9444 - val_acc: 0.8260
Epoch 8/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.6670 - acc: 0.9866 - val_loss: 0.8486 - val_acc: 0.8426
Epoch 9/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.6121 - acc: 0.9943 - val_loss: 0.8455 - val_acc: 0.8708
Epoch 10/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.5634 - acc: 0.9964 - val_loss: 0.8335 - val_acc: 0.8553
Epoch 11/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.5216 - acc: 0.9973 - val_loss: 0.9688 - val_acc: 0.7838
Epoch 12/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.4841 - acc: 0.9986 - val_loss: 0.8166 - val_acc: 0.8133
Epoch 13/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.4522 - acc: 0.9984 - val_loss: 0.8399 - val_acc: 0.8184
Epoch 14/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.4234 - acc: 0.9987 - val_loss: 0.7864 - val_acc: 0.8072
Epoch 15/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.3977 - acc: 0.9990 - val_loss: 0.7306 - val_acc: 0.8446
Epoch 16/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.3750 - acc: 0.9990 - val_loss: 0.7644 - val_acc: 0.8514
Epoch 17/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.3546 - acc: 0.9989 - val_loss: 0.7542 - val_acc: 0.7908
Epoch 18/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.3345 - acc: 0.9994 - val_loss: 0.7150 - val_acc: 0.8314
Epoch 19/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.3170 - acc: 0.9993 - val_loss: 0.8910 - val_acc: 0.7798
Epoch 20/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.3017 - acc: 0.9992 - val_loss: 0.6143 - val_acc: 0.8809
Epoch 21/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.2861 - acc: 0.9995 - val_loss: 0.7907 - val_acc: 0.8156
Epoch 22/500
55796/55796 [==============================] - 205s 4ms/step - loss: 0.2719 - acc: 0.9996 - val_loss: 0.7077 - val_acc: 0.8401
Epoch 23/500
55796/55796 [==============================] - 206s 4ms/step - loss: 0.2593 - acc: 0.9995 - val_loss: 0.6482 - val_acc: 0.8133
Epoch 24/500
55796/55796 [==============================] - 204s 4ms/step - loss: 0.2474 - acc: 0.9995 - val_loss: 0.7671 - val_acc: 0.7942
The problem is appears that the model starts to overfit and on the testing dataset makes significant detection errors. So far as i see the problem that model can't see the difference between these to actions, or maybe the sequence problem. As you see i've already tried regularization, clipping and so on. No result.
Please any advice regarding how to fix this problem.