I am using a version of the Matterport MRCNN repository to classify 2 objects (lets say dog & cat) trained with ~5k images (1024x1024) using the mask_rcnn_coco.h5
weights for transfer learning. I am wondering if I am doing something wrong, need to change a config parameter, adjust my image dataset, or some combination of those efforts. After I train the model and load the weights, when I attempt to predict on a test set image the bounding box will be located somewhere on the top edge of the image with box coordinates like [ 0 1801 1 1803]
for each test set image I try it with, which suggests poor prediction.
I have researched this extensively but I cannot find an issue & solution that matches my situation, though I am thinking perhaps I need to try a different approach to loading the COCO weights or add more object-free images to the training/validation set. However, some papers on MRCNN implementations I found use even fewer training/validation images than I did without including object-free images and were able to predict without issue. Does anyone have any suggestions? Thanks.
Random predictions during inference for the same image
Predictions for Mask RCNN are not working correctly
Here's the relevant code I have thus far (tensorflow == 2.10.0
):
# define a configuration for the model
class PetConfig(Config):
# define the name of the configuration
NAME = "pet_cfg"
# number of classes (background + 2 detected classes (Cat + Dog))
NUM_CLASSES = 1 + 2
# number of training steps per epoch
STEPS_PER_EPOCH = 100
IMAGES_PER_GPU = 1
MAX_GT_INSTANCES = 3
DETECTION_MAX_INSTANCES = 3
DETECTION_MIN_CONFIDENCE = 0.95
BACKBONE = "resnet50"
TRAIN_ROIS_PER_IMAGE = 10
VALIDATION_STEPS = 200
Configuration summary:
Configurations:
BACKBONE resnet50
BACKBONE_STRIDES [4, 8, 16, 32, 64]
BATCH_SIZE 1
BBOX_STD_DEV [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE None
DETECTION_MAX_INSTANCES 3
DETECTION_MIN_CONFIDENCE 0.95
DETECTION_NMS_THRESHOLD 0.3
FPN_CLASSIF_FC_LAYERS_SIZE 1024
GPU_COUNT 1
GRADIENT_CLIP_NORM 5.0
IMAGES_PER_GPU 1
IMAGE_CHANNEL_COUNT 3
IMAGE_MAX_DIM 1024
IMAGE_META_SIZE 15
IMAGE_MIN_DIM 800
IMAGE_MIN_SCALE 0
IMAGE_RESIZE_MODE square
IMAGE_SHAPE [1024 1024 3]
LEARNING_MOMENTUM 0.9
LEARNING_RATE 0.001
LOSS_WEIGHTS {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE 14
MASK_SHAPE [28, 28]
MAX_GT_INSTANCES 3
MEAN_PIXEL [123.7 116.8 103.9]
MINI_MASK_SHAPE (56, 56)
NAME pet_cfg
NUM_CLASSES 3
POOL_SIZE 7
POST_NMS_ROIS_INFERENCE 1000
POST_NMS_ROIS_TRAINING 2000
PRE_NMS_LIMIT 6000
ROI_POSITIVE_RATIO 0.33
RPN_ANCHOR_RATIOS [0.5, 1, 2]
RPN_ANCHOR_SCALES (32, 64, 128, 256, 512)
RPN_ANCHOR_STRIDE 1
RPN_BBOX_STD_DEV [0.1 0.1 0.2 0.2]
RPN_NMS_THRESHOLD 0.7
RPN_TRAIN_ANCHORS_PER_IMAGE 256
STEPS_PER_EPOCH 100
TOP_DOWN_PYRAMID_SIZE 256
TRAIN_BN False
TRAIN_ROIS_PER_IMAGE 10
USE_MINI_MASK True
USE_RPN_ROIS True
VALIDATION_STEPS 200
WEIGHT_DECAY 0.0001
config = PetConfig()
from keras import backend as K
K.clear_session() # disallocate GPU memory
# define the model
model = MaskRCNN(mode='training', model_dir="logs", config=config)
# Set early stopping after certain number of epochs
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
# load weights (mscoco) and exclude the output layers
model.load_weights("mask_rcnn_coco/mask_rcnn_coco.h5", by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
# train weights (output layers or 'heads')
model.train(train_set,
val_set,
learning_rate=0.001,
epochs=30,
custom_callbacks=[callback],
layers='heads')
Post-training saving and inference:
model.keras_model.save_weights('weights/pet_mask_rcnn.h5')
from matplotlib.patches import Rectangle
# define the prediction configuration
class PredictionConfig(Config):
# define the name of the configuration
NAME = "pet_cfg"
# number of classes (background + 2 detected classes (dog + cat))
NUM_CLASSES = 1 + 2
# simplify GPU config
GPU_COUNT = 1
IMAGES_PER_GPU = 1
MAX_GT_INSTANCES = 1
DETECTION_MAX_INSTANCES = 1
predcfg = PredictionConfig()
# Load model in inference mode
K.clear_session() # disallocate GPU memory
inf_mod = MaskRCNN(mode="inference", model_dir='weights/', config=predcfg)
inf_mod.load_weights('weights/pet_mask_rcnn.h5', by_name=True)
#Test on a single image
num=random.randint(0, len(test_set.image_ids))
# define image id
image_id = num
# load the image
test_img = test_set.load_image(image_id)
detected = inf_mod.detect([test_img])[0]
pyplot.imshow(test_img)
ax = pyplot.gca()
class_names = ['dog', 'cat']
class_id_counter=1
for box in detected['rois']:
print(box)
#get coordinates
detected_class_id = detected['class_ids'][class_id_counter-1]
#print(detected_class_id)
#print("Detected class is :", class_names[detected_class_id-1])
y1, x1, y2, x2 = box
#calculate width and height of the box
width, height = x2 - x1, y2 - y1
#create the shape
ax.annotate(class_names[detected_class_id-1], (x1, y1), color='black', weight='bold', fontsize=10, ha='center', va='center')
rect = Rectangle((x1, y1), width, height, fill=False, color='red')
#draw the box
ax.add_patch(rect)
class_id_counter+=1
#show the figure
pyplot.show()