In theory, I'd like to emulate what is done at the beginning of this blog: https://www.sicara.fr/blog-technique/dataset-generation-fine-tune-stable-diffusion-inpainting
I need to do it using detectron2 so as to use the capability of it panoptic segmentation.
However, I'm struggling to correctly extract information (bounding coordinates, class-labels, etc.) after the image has been processed through the panoptic checkpoint.
I need these bits of information if I'm to target a particular object to be masked as in the blog.
import cv2
import numpy as np
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
width, height = 512, 512
# Load the Panoptic Segmentation model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml")
predictor = DefaultPredictor(cfg)
# Load the input image
image = cv2.imread("dog.jpg")
# Preprocess the image (resize, convert to RGB)
image = cv2.resize(image, (width, height))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Perform panoptic segmentation
panoptic_output = predictor(image)
# Get the panoptic segmentation instances
instances = panoptic_output["instances"]
# Visualize the predictions
v = Visualizer(image[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
v = v.draw_panoptic_seg_predictions(panoptic_output["panoptic_seg"], instances)
output_image = v.get_image()[:, :, ::-1]
# Create a mask image of the same size as the input image
mask_image = np.zeros(image.shape[:2], dtype=np.uint8)
# Get the panoptic segmentation mask
panoptic_mask = instances.pred_masks.numpy()
# Iterate over the panoptic segmentation mask and extract the mask of dogs
for idx, mask in enumerate(panoptic_mask):
if instances.pred_classes[idx] == 16: # Assuming class ID 16 represents dogs
mask_image += mask.astype(np.uint8) * 255
# Perform translation on the mask image
mask_image += np.roll(mask_image, 10, axis=0) # Translate the mask 10 pixels to the left
mask_image += np.roll(mask_image, -10, axis=0) # Translate the mask 10 pixels to the right
mask_image += np.roll(mask_image, 10, axis=1) # Translate the mask 10 pixels to the bottom
mask_image += np.roll(mask_image, -10, axis=1) # Translate the mask 10 pixels to the top
# Set non-black pixels to white pixels
mask_image = np.clip(mask_image, 0, 255)
# Save the mask image
cv2.imwrite("mask_image.jpg", mask_image)
I keep getting errors everywhere. Here's an example:
return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
Traceback (most recent call last):
File "C:\Users\Daniel Kolb\Moreland-Connect\SD-Housing-Proj\Diffusers\test.py", line 32, in
v = v.draw_panoptic_seg_predictions(panoptic_output["panoptic_seg"], instances)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\users\daniel kolb\moreland-connect\sd-housing-proj\diffusers\detectron2\detectron2\utils\visualizer.py", line 488, in draw_panoptic_seg
pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\users\daniel kolb\moreland-connect\sd-housing-proj\diffusers\detectron2\detectron2\utils\visualizer.py", line 186, in init
self._sinfo = {s["id"]: s for s in segments_info} # seg id -> seg info
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\users\daniel kolb\moreland-connect\sd-housing-proj\diffusers\detectron2\detectron2\structures\instances.py", line 151, in iter
raise NotImplementedError("Instances
object is not iterable!")
NotImplementedError: Instances
object is not iterable!