Im training a faster r-cnn model with the detectron2 framework. When I'm training the model using only 15 images in the dataset it works on my RTX 3060, however, when training on a dataset of 3000 images cuda goes out of memory. Despite using a batch size of 1 for both datasets. I do not understand why this is happening as the model should only be processing one image at a time for both dataset sizes. Thus it seems like the model is trying to load all 3000 images at once which I would imagine is not necessary when I set the batch size to 1.
I was thinking it was due to some image in the larger dataset has way more bound boxes or something similar leading to this. However, it gets out of memory immediately which makes me think that is not the reason
The code I'm using is simply from the detectron2 documentation:
import torch
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
import numpy as np
import glob
import os, json, cv2, random
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
def train():
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml"))
cfg.DATASETS.TRAIN = ("visdrone-test",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 1
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 1
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 300
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 10
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
if __name__ == '__main__':
torch.cuda.empty_cache()
register_coco_instances("visdrone-train", {}, "D:/data/VisDrone2019-DET-train/labels.json", "D:/data/VisDrone2019-DET-train/images/")
register_coco_instances("visdrone-val", {}, "D:/data/VisDrone2019-DET-val/labels.json", "D:/data/VisDrone2019-DET-val/images/")
register_coco_instances("visdrone-test", {}, "D:/data/VisDrone2019-DET-test/labels.json", "D:/data/VisDrone2019-DET-test/images/")
sample_metadata = MetadataCatalog.get("visdrone-val")
dataset_dicts = DatasetCatalog.get("visdrone-val")
train()