I am training the UNET image segmentation network on brain tumor dataset from figshare, it is training well, training loss and training dice score both are changing accordingly or in the same tone with validation loss and validation dice score. Means no question of Overfitting. But after approximately 40 epochs no improvement in performance measures. It's toggling around loss 0.58 and dice score of 0.47. How to solve this? Please suggest me. Below is my UNET network-
def unet(pretrained_weights = None,input_size = (512,512,3)):
inputs = Input(input_size)
conv1 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(inputs)
conv1 = BatchNormalization()(conv1)
#conv1 = Dropout(0.2)(conv1)
conv1 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv1)
conv1 = BatchNormalization()(conv1)
#conv1 = Dropout(0.2)(conv1)
pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
conv2 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool1)
conv2 = BatchNormalization()(conv2)
#conv2 = Dropout(0.1)(conv2)
conv2 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv2)
conv2 = BatchNormalization()(conv2)
#conv2 = Dropout(0.1)(conv2)
pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)
conv3 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool2)
conv3 = BatchNormalization()(conv3)
#conv3 = Dropout(0.1)(conv3)
conv3 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv3)
conv3 = BatchNormalization()(conv3)
#conv3 = Dropout(0.1)(conv3)
pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
conv4 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool3)
conv4 = BatchNormalization()(conv4)
#conv4 = Dropout(0.1)(conv4)
conv4 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv4)
conv4 = BatchNormalization()(conv4)
#conv4 = Dropout(0.5)(conv4)
pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)
conv5 = Convolution2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(pool4)
conv5 = BatchNormalization()(conv5)
#conv5 = Dropout(0.1)(conv5)
conv5 = Convolution2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv5)
conv5 = BatchNormalization()(conv5)
#conv5 = Dropout(0.5)(conv5)
up6 = Convolution2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv5))
merge6 = concatenate([conv4,up6], axis = 3)
conv6 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge6)
conv6 = BatchNormalization()(conv6)
#conv6 = Dropout(0.1)(conv6)
conv6 = Convolution2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv6)
conv6 = BatchNormalization()(conv6)
#conv6 = Dropout(0.1)(conv6)
up7 = Convolution2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv6))
merge7 = concatenate([conv3,up7], axis = 3)
conv7 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge7)
conv7 = BatchNormalization()(conv7)
#conv7 = Dropout(0.1)(conv7)
conv7 = Convolution2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv7)
conv7 = BatchNormalization()(conv7)
#conv7 = Dropout(0.1)(conv7)
up8 = Convolution2D(128, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv7))
merge8 = concatenate([conv2,up8], axis = 3)
conv8 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge8)
conv8 = BatchNormalization()(conv8)
#conv8 = Dropout(0.1)(conv8)
conv8 = Convolution2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv8)
conv8 = BatchNormalization()(conv8)
#conv8 = Dropout(0.1)(conv8)
up9 = Convolution2D(64, 2, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(UpSampling2D(size = (2,2))(conv8))
merge9 = concatenate([conv1,up9], axis = 3)
conv9 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(merge9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv9 = Convolution2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv9 = Convolution2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer = 'he_normal')(conv9)
conv9 = BatchNormalization()(conv9)
#conv9 = Dropout(0.2)(conv9)
conv10 = Convolution2D(1, 1, activation = 'sigmoid')(conv9)
model = Model(input = inputs, output = conv10)
#model.summary()
if(pretrained_weights):
model.load_weights(pretrained_weights)
return model
Callback details initialized. Staring LR= 1e-4
callbacks = [EarlyStopping(monitor='val_loss',mode="min", patience=30,verbose=1,min_delta=1e-4),
ReduceLROnPlateau(monitor='val_loss',mode="min", factor=0.1,patience=8,verbose=1),
ModelCheckpoint(monitor='val_loss',mode="min",
filepath='weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-{epoch:03d}-{dice_coef:.6f}--{val_loss:.6f}.hdf5',save_weights_only=True, verbose=1),
CSVLogger('weights/anmol/1/UNET_mixed_loss_monitor_DC_new.csv')]
My user-defined Dice Score and loss functions. I have used dice_coef_loss here.
def dice_coef(y_true, y_pred, smooth=1):
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def dice_loss(y_true, y_pred):
loss = 1 - dice_coef(y_true, y_pred)
return loss
def dice_coef_loss(y_true, y_pred):
return binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
Used 2605 images for training, 306 images for validation. Train_image Train_mask
img_size = 512
image_args = dict(seed=seed,
batch_size=2,
shuffle=True,
class_mode=None,
target_size=(img_size, img_size),
color_mode='rgb')
mask_args = dict(seed=seed,
batch_size=2,
class_mode=None,
shuffle=True,
target_size=(img_size, img_size),
color_mode='grayscale')
DIR = 'raw/brain/'
image = 'images'
masks = 'masks'
# combine generators into one which yields image and masks
train_generator = zip(image_datagen.flow_from_directory(**image_args, directory=DIR+'train_'+image),
mask_datagen.flow_from_directory(**mask_args, directory=DIR+'train_'+masks))
validation_generator = zip(image_datagen.flow_from_directory(**image_args, directory=DIR+'validation_'+image),
mask_datagen.flow_from_directory(**mask_args, directory=DIR+'validation_'+masks))
model.fit_generator(train_generator, steps_per_epoch=1302, epochs=100, validation_data=validation_generator,validation_steps=153, callbacks=callbacks)
some training log shown below
Epoch 00041: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-041-0.466533--0.511900.hdf5 Epoch 42/100 1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5939 - dice_coef: 0.4658 - val_loss: 0.5076 - val_dice_coef: 0.5430
Epoch 00042: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-042-0.465990--0.507603.hdf5 Epoch 43/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5928 - dice_coef: 0.4678 - val_loss: 0.5191 - val_dice_coef: 0.5270
Epoch 00043: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-043-0.467685--0.519115.hdf5 Epoch 44/100 1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5966 - dice_coef: 0.4632 - val_loss: 0.5158 - val_dice_coef: 0.5364
Epoch 00044: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-044-0.463308--0.515760.hdf5 Epoch 45/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5892 - dice_coef: 0.4702 - val_loss: 0.4993 - val_dice_coef: 0.5507
Epoch 00045: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-045-0.470134--0.499294.hdf5 Epoch 46/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5960 - dice_coef: 0.4636 - val_loss: 0.5166 - val_dice_coef: 0.5329
Epoch 00046: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-046-0.463810--0.516552.hdf5 Epoch 47/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5920 - dice_coef: 0.4672 - val_loss: 0.5062 - val_dice_coef: 0.5427
Epoch 00047: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-047-0.467146--0.506242.hdf5 Epoch 48/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5938 - dice_coef: 0.4657 - val_loss: 0.5239 - val_dice_coef: 0.5277
Epoch 00048: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-048-0.465866--0.523923.hdf5 Epoch 49/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5962 - dice_coef: 0.4639 - val_loss: 0.5035 - val_dice_coef: 0.5434
Epoch 00049: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-049-0.463924--0.503518.hdf5 Epoch 50/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5854 - dice_coef: 0.4743 - val_loss: 0.5463 - val_dice_coef: 0.5066
Epoch 00050: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-050-0.474530--0.546343.hdf5 Epoch 51/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5840 - dice_coef: 0.4749 - val_loss: 0.5146 - val_dice_coef: 0.5360
Epoch 00051: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-051-0.475072--0.514581.hdf5 Epoch 52/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5852 - dice_coef: 0.4742 - val_loss: 0.5257 - val_dice_coef: 0.5256
Epoch 00052: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-052-0.474234--0.525729.hdf5 Epoch 53/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5857 - dice_coef: 0.4736 - val_loss: 0.5157 - val_dice_coef: 0.5315
Epoch 00053: ReduceLROnPlateau reducing learning rate to 9.999999747378752e-07.
Epoch 00053: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-053-0.473557--0.515651.hdf5 Epoch 54/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5852 - dice_coef: 0.4737 - val_loss: 0.5067 - val_dice_coef: 0.5421
Epoch 00054: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-054-0.473682--0.506671.hdf5 Epoch 55/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5903 - dice_coef: 0.4696 - val_loss: 0.4910 - val_dice_coef: 0.5571
Epoch 00055: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-055-0.469478--0.491024.hdf5 Epoch 56/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5876 - dice_coef: 0.4711 - val_loss: 0.5154 - val_dice_coef: 0.5340
Epoch 00056: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-056-0.471110--0.515441.hdf5 Epoch 57/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5897 - dice_coef: 0.4703 - val_loss: 0.5263 - val_dice_coef: 0.5258
Epoch 00057: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-057-0.470255--0.526310.hdf5 Epoch 58/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5849 - dice_coef: 0.4741 - val_loss: 0.5067 - val_dice_coef: 0.5451
Epoch 00058: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-058-0.474262--0.506664.hdf5 Epoch 59/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5816 - dice_coef: 0.4769 - val_loss: 0.5160 - val_dice_coef: 0.5348
Epoch 00059: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-059-0.476830--0.516005.hdf5 Epoch 60/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5891 - dice_coef: 0.4709 - val_loss: 0.5179 - val_dice_coef: 0.5318
Epoch 00060: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-060-0.470746--0.517893.hdf5 Epoch 61/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5873 - dice_coef: 0.4727 - val_loss: 0.5064 - val_dice_coef: 0.5431
Epoch 00061: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-061-0.472722--0.506373.hdf5 Epoch 62/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5803 - dice_coef: 0.4793 - val_loss: 0.5187 - val_dice_coef: 0.5319
Epoch 00062: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-062-0.479199--0.518674.hdf5 Epoch 63/100 1302/1302 [==============================] - 1066s 819ms/step - loss: 0.5843 - dice_coef: 0.4738 - val_loss: 0.5052 - val_dice_coef: 0.5459
Epoch 00063: ReduceLROnPlateau reducing learning rate to 9.999999974752428e-08.
Epoch 00063: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-063-0.473731--0.505171.hdf5 Epoch 64/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5859 - dice_coef: 0.4731 - val_loss: 0.5064 - val_dice_coef: 0.5419
Epoch 00064: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-064-0.473008--0.506380.hdf5 Epoch 65/100 1302/1302 [==============================] - 1064s 817ms/step - loss: 0.5836 - dice_coef: 0.4752 - val_loss: 0.4997 - val_dice_coef: 0.5508
Epoch 00065: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-065-0.475424--0.499673.hdf5 Epoch 66/100 1302/1302 [==============================] - 1063s 817ms/step - loss: 0.5932 - dice_coef: 0.4660 - val_loss: 0.5168 - val_dice_coef: 0.5338
Epoch 00066: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-066-0.465829--0.516758.hdf5 Epoch 67/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5820 - dice_coef: 0.4765 - val_loss: 0.5179 - val_dice_coef: 0.5323
Epoch 00067: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-067-0.476715--0.517926.hdf5 Epoch 68/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5912 - dice_coef: 0.4689 - val_loss: 0.5125 - val_dice_coef: 0.5375
Epoch 00068: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-068-0.468950--0.512456.hdf5 Epoch 69/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5820 - dice_coef: 0.4769 - val_loss: 0.5282 - val_dice_coef: 0.5237
Epoch 00069: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-069-0.476976--0.528154.hdf5 Epoch 70/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5845 - dice_coef: 0.4743 - val_loss: 0.5204 - val_dice_coef: 0.5303
Epoch 00070: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-070-0.474195--0.520356.hdf5 Epoch 71/100 1302/1302 [==============================] - 1063s 816ms/step - loss: 0.5886 - dice_coef: 0.4708 - val_loss: 0.5230 - val_dice_coef: 0.5270
Epoch 00071: ReduceLROnPlateau reducing learning rate to 1.0000000116860975e-08.
Epoch 00071: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-071-0.470715--0.523011.hdf5 Epoch 72/100 1302/1302 [==============================] - 1062s 816ms/step - loss: 0.5837 - dice_coef: 0.4759 - val_loss: 0.5216 - val_dice_coef: 0.5303
Epoch 00072: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-072-0.475787--0.521631.hdf5 Epoch 73/100 1302/1302 [==============================] - 1062s 815ms/step - loss: 0.5804 - dice_coef: 0.4780 - val_loss: 0.5333 - val_dice_coef: 0.5171
Epoch 00073: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-073-0.478063--0.533321.hdf5 Epoch 74/100 1302/1302 [==============================] - 1065s 818ms/step - loss: 0.5842 - dice_coef: 0.4747 - val_loss: 0.5126 - val_dice_coef: 0.5393
Epoch 00074: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-074-0.474628--0.512649.hdf5 Epoch 75/100 1302/1302 [==============================] - 1069s 821ms/step - loss: 0.5836 - dice_coef: 0.4755 - val_loss: 0.5103 - val_dice_coef: 0.5386
Epoch 00075: saving model to weights/anmol/1/UNET_sigmoid_focus_DC_2605_R_B_t-075-0.475690--0.510267.hdf5 Epoch 76/100 160/1302 [==>...........................] - ETA: 15:02 - loss: 0.6069 - dice_coef: 0.4548