I am working on training the object detector with a custom dataset designed to detect the head of a plant. I am using the "Faster R-CNN with Resnet-101 (v1)" that was originally designed for the pet dataset.
I modified the config file to match my dataset (1875 training/375 eval) of images that 275x550 in size. I converted all record files. And the pipeline file is shown below.
I trained on a gpu overnight for 100k steps and the actual evaluation results look really good. It detects all the plant heads and the data is really useful.
The issue is the actual metrics. When checking the tensorboard logs for the eval, all the metrics increase until 30k steps and then drop again making a nice hump in the middle. This goes for the loss, mAP, and precision results.
Why is this result happening? I assumed that if you keep training, the metrics should just flatten out to a line and not just decrease downwards again.
mAP Evaluation: https://i.stack.imgur.com/SJgBz.jpg
Loss Evaluation: https://i.stack.imgur.com/JfMd5.jpg
# Faster R-CNN with Resnet-101 (v1) originally for Oxford-IIIT Pets Dataset. Modified for wheat head detection
# Users should configure the fine_tune_checkpoint field in the train config as
# well as the label_map_path and input_path fields in the train_input_reader and
# eval_input_reader. Search for "" to find the fields that
# should be configured.
model {
faster_rcnn {
num_classes: 1
image_resizer {
keep_aspect_ratio_resizer {
min_dimension: 275
max_dimension: 550
}
}
feature_extractor {
type: 'faster_rcnn_resnet101'
first_stage_features_stride: 16
}
first_stage_anchor_generator {
grid_anchor_generator {
scales: [0.25, 0.5, 1.0, 2.0]
aspect_ratios: [0.5, 1.0, 2.0]
height_stride: 16
width_stride: 16
}
}
first_stage_box_predictor_conv_hyperparams {
op: CONV
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
truncated_normal_initializer {
stddev: 0.01
}
}
}
first_stage_nms_score_threshold: 0.0
first_stage_nms_iou_threshold: 0.7
first_stage_max_proposals: 300
first_stage_localization_loss_weight: 2.0
first_stage_objectness_loss_weight: 1.0
initial_crop_size: 14
maxpool_kernel_size: 2
maxpool_stride: 2
second_stage_box_predictor {
mask_rcnn_box_predictor {
use_dropout: false
dropout_keep_probability: 1.0
fc_hyperparams {
op: FC
regularizer {
l2_regularizer {
weight: 0.0
}
}
initializer {
variance_scaling_initializer {
factor: 1.0
uniform: true
mode: FAN_AVG
}
}
}
}
}
second_stage_post_processing {
batch_non_max_suppression {
score_threshold: 0.0
iou_threshold: 0.6
max_detections_per_class: 100
max_total_detections: 300
}
score_converter: SOFTMAX
}
second_stage_localization_loss_weight: 2.0
second_stage_classification_loss_weight: 1.0
}
}
train_config: {
batch_size: 1
optimizer {
momentum_optimizer: {
learning_rate: {
manual_step_learning_rate {
initial_learning_rate: 0.0003
schedule {
step: 900000
learning_rate: .00003
}
schedule {
step: 1200000
learning_rate: .000003
}
}
}
momentum_optimizer_value: 0.9
}
use_moving_average: false
}
gradient_clipping_by_norm: 10.0
fine_tune_checkpoint: "object_detection/faster_rcnn_resnet101_coco_11_06_2017/model.ckpt"
from_detection_checkpoint: true
load_all_detection_checkpoint_vars: true
# Note: The below line limits the training process to 200K steps, which we
# empirically found to be sufficient enough to train the pets dataset. This
# effectively bypasses the learning rate schedule (the learning rate will
# never decay). Remove the below line to train indefinitely.
num_steps: 200000
data_augmentation_options {
random_horizontal_flip {
}
}
}
train_input_reader: {
tf_record_input_reader {
input_path: "object_detection/data_wheat/train.record-?????-of-00010"
}
label_map_path: "object_detection/data_wheat/wheat_label_map.pbtxt"
}
eval_config: {
metrics_set: "coco_detection_metrics"
num_examples: 375
}
eval_input_reader: {
tf_record_input_reader {
input_path: "object_detection/data_wheat/val.record-?????-of-00010"
}
label_map_path: "object_detection/data_wheat/wheat_label_map.pbtxt"
shuffle: false
num_readers: 1
}