官方文档 https://mmdetection.readthedocs.io/zh-cn/latest/api.html#mmdet-structures
``gt_instances``(InstanceData): Ground truth of instance annotations. 标注的数据
``pred_instances``(InstanceData): Instances of detection predictions. 预测的数据
``ignored_instances``(InstanceData): Instances to be ignored during training/testing. 训练或测试中忽略的数据
``gt_panoptic_seg``(PixelData): Ground truth of panoptic segmentation. 全景分割的标注数据,
``pred_panoptic_seg``(PixelData): Prediction of panoptic segmentation. 全景分割的预测
``gt_sem_seg``(PixelData): Ground truth of semantic segmentation. 语义分割的标注数据
``pred_sem_seg``(PixelData): Prediction of semantic segmentation. 语义分割预测数据
我这里面主要使用的是 pred_instances
from mmengine.structures import InstanceData, PixelData
问题: 获取mask后, 如果做计算?
根据mask将图片扣取出来
如下程序获取mask, 当然 epoch_24.pth 是预先训练好的
from mmdet.apis import init_detector,inference_detector
import mmcv
import matplotlib.pyplot as plt
from mmdet.registry import VISUALIZERS
config_file_retrain = 'configs/radish/mask_rcnn_r50_fpn_2x_coco_radish.py'
checkpoint_file_retrain = 'work_dirs/mask_rcnn_r50_fpn_2x_coco_radish/epoch_24.pth'
model = init_detector(config_file_retrain,checkpoint_file_retrain,device='cpu')
# image_path = './data/radish/20231121145620_1-1.jpg'
# image_path = './data/radish/20231220/20231202115005_1-1.jpg'
# image_path = './data/radish/20231220/20231129161723_1-1.jpg'
# image_path = './data/radish/test2012/2012120076_20201212094819.png'
image_path = './data/radish/test2012/2012120261_20201212151200.png'
image_path = './data/radish/test2012/2012140043_20201214085421.png'
image_path = './data/radish/test2012/2012120158_20201212111739.png'
image_path = './data/radish/test2012/2012120305_20201212154238.png'
image_path = './data/radish/20231220/20231129161409_1-1.jpg'
image_path = './data/radish/test2012/2012120179_20201212133724.png'
img = mmcv.imread( image_path, channel_order='rgb')
plt.imshow(img)
result_retrain = inference_detector(model,img)
# init the visualizer(execute this block only once)
visualizer = VISUALIZERS.build(model.cfg.visualizer)
# the dataset_meta is loaded from the checkpoint and
# then pass to the model in init_detector
# visualizer.dataset_meta = model0.dataset_meta
visualizer.dataset_meta = model.dataset_meta # set the visualizer metadata when you changed your dataset
# print(result_retrain)
# show the results
visualizer.add_datasample(
'result',
img,
data_sample=result_retrain,
draw_gt=False,
wait_time=0,
)
visualizer.show()
处理mask
import cv2
import numpy as np
masks = result_retrain.pred_instances.masks.detach().cpu().numpy()
# print(masks[0])
height = masks[0].shape[0]
width = masks[0].shape[1]
# print(height)
# print(width)
# print(result_retrain.pred_instances.masks[0].cpu().numpy())
from mmdet.structures.mask import encode_mask_results, mask2bbox
encode_masks = encode_mask_results(result_retrain.pred_instances.masks)
# print(encode_masks)
bboxes = mask2bbox(result_retrain.pred_instances.masks.cpu()).numpy().tolist()
# mask的 x, y, w, h
bboxes = bboxes[0]
print(bboxes)
image_path = './data/radish/test2012/2012120179_20201212133724.png'
#img=cv2.imread(image_path, cv2.COLOR_BGR2GRAY)
img = mmcv.imread( image_path, channel_order='rgb')
# bboxes 画框
# mask_BGR = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
color = (0, 0, 255) # Red color in BGR;红色:rgb(255,0,0)
thickness = 2 # Line thickness of 1 px
# start_point, end_point = (bboxes[0], bboxes[1]), (bboxes[0]+bboxes[2], bboxes[1]+bboxes[3])
x0, y0 = int(bboxes[0]), int(bboxes[1])
x1 = int(bboxes[2]-1)
y1 = int(bboxes[3]-1)
print(f'x0:{x0}, y0:{y0}, x1:{x1}, y1:{y1}')
start_point, end_point = (x0, y0), (x1, y1)
print(start_point)
print(end_point)
mask_bboxs = cv2.rectangle(img, start_point, end_point, color, thickness)
plt.imshow(img)
#剪裁
patch_bboxes = np.array(bboxes)
patch = mmcv.imcrop(img, patch_bboxes)
plt.imshow(patch)
mask = masks[0]
# 将其它区域颜色变成0
# change color of background and balloon
masked_b = img[:, :, 0] * mask
masked_g = img[:, :, 1] * mask
masked_r = img[:, :, 2] * mask
masked = np.concatenate([masked_b[:, :, None], masked_g[:, :, None], masked_r[:, :, None]], axis=2)
un_mask = 1 - mask
frame_b = img[:, :, 0] * un_mask
frame_g = img[:, :, 1] * un_mask
frame_r = img[:, :, 2] * un_mask
img = np.concatenate([frame_b[:, :, None], frame_g[:, :, None], frame_r[:, :, None]], axis=2).astype(np.uint8)
# background gray color with 3 channels
img = mmcv.bgr2gray(img, keepdim=True)
img = np.concatenate([img, img, img], axis=2)
# img += masked
plt.imshow(masked)
# 旋转
# show the results
# visualizer.add_datasample(
# 'result',
# img,
# data_sample=result_retrain,
# draw_gt=False,
# wait_time=0,
# )
# visualizer.show()
打印出来处理结果
然后针对隔离出来的图片进行后续处理