在之前的文章中,我们认识了COCO数据集的基本格式
https://blog.csdn.net/qq_44776065/article/details/128695821和制作了分割数据集 制作COCO格式目标检测和分割数据集
https://blog.csdn.net/qq_44776065/article/details/128697177,那么接下来如何读取数据集,并展示结果呢?接下来我们解决这个问题
pycocotools是官方给出的解析COCO格式数据集的API,帮助我们对COCO格式数据集进行操作,官方API:https://github.com/cocodataset/cocoapi,在PythonAPI
中有Demo,可以下载后运行
安装pycocotools
(本人安装时Linux
和windows
都可使用)
pip install pycocotools
重要属性
coco.imgs
coco.anns
coco.cats
重要API,get
与load
:
基本思想:先获取ID,再加载信息
获取ID:
getImgIds()
指定ID回返回指定的IDimgIds
和catIds
获取标注ID:getAnnIds(imgIds=[],catsIds=[])
getCatIds()
加载信息:
loadImgs(img_id)
,获取的是字典信息,获取路径信息为: loadImgs(img_id)[0]["file_name"]
loadAnns(ann_ids)
,ann_ids
来自筛选的的标注idloadCats(cat_id)
例子:初始化COCO对象,并获取图片ID
from pycocotools.coco import COCO
import os
dataset_root = "D:MyDataset/my_coco"
anno_file = "my_annotations.json"
anno_path = os.path.join(dataset_root, anno_file)
anno = COCO(anno_path)
image_ids = anno.getImgIds()
基本流程:
COCO
数据集,并获取所有图片的IDindex
获取图片ID,再根据图片ID(或者类别ID)
获取标注ID标注ID
,加载标注信息tensor
初始化:
from PIL import Image
from torch.utils.data.dataset import Dataset
from torch.utils.data.dataloader import DataLoader
from pycocotools.coco import COCO
from utils import convert_coco_poly_mask, draw_gt
import utils.coco_transform as coco_transform
class SegDatasetCOCO(Dataset):
def __init__(self, dataset_root, p_anno_filename, category, transforms) -> None:
super(SegDatasetCOCO).__init__()
# 根据数据集和p_dir获取标注文件
assert os.path.exists(dataset_root), "{0} does not exists".format(dataset_root)
anno_root = os.path.join(dataset_root, p_anno_filename)
self.patient_dir = p_anno_filename[0: -17]
self.transforms = transforms
self.category = category
# 加载COCO数据
self.anno = COCO(annotation_file=anno_root)
# 获取其中的数据
# self.ids = list(self.anno.imgs.keys())
self.ids = self.anno.getImgIds()
self.dataset_root = dataset_root
# 输出目录信息
print(f"Dataset Info Name: {self.patient_dir}")
print(f"Dataset Info dataset len: {len(self.ids)}")
获取单个batch:
def __getitem__(self, index):
# 获取图片和标注
img_id = self.ids[index]
# 读取图片
filename = self.anno.loadImgs(img_id)[0]["file_name"]
filepath = os.path.join(self.dataset_root, filename)
images = Image.open(filepath).convert("L")
# 获取标注
w, h = images.size
# 根据图片ID和类别ID获取标注ID
anno_ids = self.anno.getAnnIds(imgIds=img_id, catIds=self.category)
coco_targets = self.anno.loadAnns(anno_ids)
# # 选择标签
# coco_targets = [item for item in coco_targets if item["category_id"] == self.category]
target = self.parse_targets(img_id=img_id, coco_targets=coco_targets, w=w, h=h)
# 返回处理后的数据
if self.transforms is not None:
images, target = self.transforms(images, target)
return images, target
对标注信息处理:
def parse_targets(self,
img_id: int,
coco_targets: list,
w: int = None,
h: int = None):
assert w > 0, "w 不合法"
assert h > 0, "h 不合法"
# 只筛选出单个对象的情况
anno = [obj for obj in coco_targets if obj['iscrowd'] == 0]
boxes = [obj["bbox"] for obj in anno]
# 转化为tensor格式, box的格式: [xmin, ymin, w, h] -> [xmin, ymin, xmax, ymax]
boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
boxes[:, 2:] += boxes[:, :2]
boxes[:, 0::2].clamp_(min=0, max=w)
boxes[:, 1::2].clamp_(min=0, max=h)
# 类别标签
classes = [obj["category_id"] for obj in anno]
classes = torch.tensor(classes, dtype=torch.int64)
# 面积
area = torch.tensor([obj["area"] for obj in anno])
iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
# 分割标签转化为图片
segmentations = [obj["segmentation"] for obj in anno]
masks = convert_coco_poly_mask(segmentations, h, w)
# 筛选出合法的目标,即 x_max>x_min 且 y_max>y_min
keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
boxes = boxes[keep]
classes = classes[keep]
masks = masks[keep]
area = area[keep]
iscrowd = iscrowd[keep]
target = {}
target["boxes"] = boxes
target["labels"] = classes
target["masks"] = masks
target["image_id"] = torch.tensor([img_id])
target["area"] = area
target["iscrowd"] = iscrowd
return target
创建Dataset
与DataLoader
dataset_root = r"D:\Learning\OCT\oct-dataset-master\dataset\dataset_stent_coco"
p_anno_filename = "P9_1_IMG002_annotations.json"
category = 2
transforms = coco_transform.Compose([coco_transform.ToTensor()])
dataset = SegDatasetCOCO(
dataset_root=dataset_root,
p_anno_filename=p_anno_filename,
category=category,
transforms=transforms
)
dataset_loader = DataLoader(
dataset=dataset,
batch_size=1,
shuffle=False,
collate_fn=dataset.collate_fn
)
数据集读取需要特殊处理,原因是默认的batch组装无法将结果进行打包,原因是每一张图片的mask
的维度不一致,根据目标的个数确定mask的个数
@staticmethod
def collate_fn(batch):
return tuple(zip(*batch))
batch数据格式,数据均为tensor:
image, {"bbox": [[1, 2, ,3 4], ...], "classes": [1, ...], "mask": [[[1,0, 0], [0, 0, 0], [1, 1, 1,1]], ...], "area": [100.0, ...]}
原理分析:
if __name__ == "__main__":
a1 = ["a", [1, 2, 3]]
a2 = ["b", [3, 4]] # 第二个的元素维度不一致
b = [a1, a2]
c = zip(*(b))
for i in c:
print(i)
pass
# ('a', 'b')
# ([1, 2, 3], [3, 4])
使用*解开a迭代器
, 将维度不一致的当作一个元素, 使用zip将两个迭代器对应位置的元素进行组合, 完成batch的合并
如果有不同类的元素
if __name__ == "__main__":
a1 = ["a", [1, 2, 3]]
a2 = ["b", [3, 4]]
a3 = ["c", {"array": [5, 6]}]
b = [a1, a2, a3]
c = tuple(zip(*(b)))
for i in c:
print(i)
pass
# ('a', 'b', 'c')
# ([1, 2, 3], [3, 4], {'array': [5, 6]})
即使多个batch中有不同的元素,这样的情况一般不会出现,常常出现的问题是batch中某个数据维度不一致
预测结果和GT文件的数据类型基本一致,不同的是模型会输出一定的置信度
在实际的项目中,预测结果会按照置信度进行筛选,之后在进行绘制,置信度在 0 - 1
之间
完整代码:
需要将数据类型转换为ndarray
之后进行绘制
if __name__ == "__main__":
dataset_root = r"D:\Learning\OCT\oct-dataset-master\dataset\dataset_stent_coco"
p_anno_filename = "P9_1_IMG002_annotations.json"
category = 2
transforms = coco_transform.Compose([coco_transform.ToTensor()])
dataset = SegDatasetCOCO(
dataset_root=dataset_root,
p_anno_filename=p_anno_filename,
category=category,
transforms=transforms
)
dataset_loader = DataLoader(
dataset=dataset,
batch_size=1,
shuffle=False,
collate_fn=dataset.collate_fn
)
for i, (image, predictions) in enumerate(dataset_loader):
if i == 1:
break
# image = np.uint8(image[0].permute(1, 2, 0).cpu().numpy() * 255) # RGB图像
# 获取的都是batch中的第一个
# 获取图片,图片的类型是RGB图像
image = np.uint8(torch.squeeze(torch.squeeze(image[0], dim=0), dim=0).cpu().numpy() * 255) # 灰度图
image = Image.fromarray(image).convert("RGB") # 绘制灰度图必须保证图像的指定的颜色于其对应
# 将数据都转化为numpy
predict_boxes = predictions[0]["boxes"].to("cpu").numpy() # [[x_min, y_min, x_max, y_max], ...]
predict_classes = predictions[0]["labels"].to("cpu").numpy() # [1, ...] 类别
predict_scores = np.ones(len(predict_boxes)) # [0.9, ...]
predict_mask = predictions[0]["masks"].to("cpu").numpy() # [[[1, 1, 1], [1, 1, 1], [0, 0, 0]], ...]
# 定义总共的类别
category_index = {
"1": "cerebral",
"2": "stent"
}
# 绘制gt
plot_img = draw_gt(image=image,
boxes=predict_boxes,
classes=predict_classes,
scores=predict_scores,
masks=predict_mask,
category_index=category_index,
line_thickness=2,
font='arial.ttf',
font_size=13)
# plt.imshow(plot_img)
# plt.show()
# 保存预测的图片结果
plot_img.save(f"./test/test_gt_cats{category}_{p_anno_filename[0: -17]}_{predictions[0]['image_id'].item()}.jpg")
coco_mask
将polygon
信息转化为rle格式
,关于RLE格式,参考:mask
mask
维度为3,为打包成batch准备,batch中图片格式:B, C, W, H
from pycocotools import mask as coco_mask
def convert_coco_poly_mask(segmentations, height, width):
masks = []
for polygons in segmentations:
rles = coco_mask.frPyObjects(polygons, height, width)
mask = coco_mask.decode(rles)
if len(mask.shape) < 3:
mask = mask[..., None]
mask = torch.as_tensor(mask, dtype=torch.uint8) # 维度w为[h, w, c]
mask = mask.any(dim=2) # >1 赋值为 True =0为False, 维度将为[h, w]
masks.append(mask)
if masks:
masks = torch.stack(masks, dim=0)
else:
# 如果mask为空,则说明没有目标,直接返回数值为0的mask
masks = torch.zeros((0, height, width), dtype=torch.uint8)
return masks
再次封装torchvision.transforms.ToTensor
等函数,从而对image
和target
同时处理
import random
from torchvision.transforms import functional as F
class Compose(object):
"""组合多个transform函数"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
class ToTensor(object):
"""将PIL图像转为Tensor"""
def __call__(self, image, target):
image = F.to_tensor(image)
return image, target
class RandomHorizontalFlip(object):
"""随机水平翻转图像以及bboxes"""
def __init__(self, prob=0.5):
self.prob = prob
def __call__(self, image, target):
if random.random() < self.prob:
height, width = image.shape[-2:]
image = image.flip(-1) # 水平翻转图片
bbox = target["boxes"]
# bbox: xmin, ymin, xmax, ymax
bbox[:, [0, 2]] = width - bbox[:, [2, 0]] # 翻转对应bbox坐标信息
target["boxes"] = bbox
if "masks" in target:
target["masks"] = target["masks"].flip(-1)
return image, target
根据draw_boxes_on_image
或者 draw_masks_on_image
来选择绘制类型
目标检测框bbox
采用line进行绘制
def draw_gt(image: Image,
boxes: np.ndarray = None,
classes: np.ndarray = None,
scores: np.ndarray = None,
masks: np.ndarray = None,
category_index: dict = None,
box_thresh: float = 0.1,
mask_thresh: float = 0.5,
line_thickness: int = 8,
font: str = 'arial.ttf',
font_size: int = 24,
draw_boxes_on_image: bool = True,
draw_masks_on_image: bool = True) -> Image:
"""
将目标边界框信息,类别信息,mask信息绘制在图片上
Args:
image: 需要绘制的图片
boxes: 目标边界框信息
classes: 目标类别信息
scores: 目标概率信息
masks: 目标mask信息
category_index: 类别与名称字典
box_thresh: 过滤的概率阈值
mask_thresh: 绘制框的置信度阈值, gt为1
line_thickness: 边界框宽度
font: 字体类型
font_size: 字体大小
draw_boxes_on_image:
draw_masks_on_image:
Returns:
"""
# 过滤掉低概率的目标
idxs = np.greater(scores, box_thresh)
boxes = boxes[idxs]
classes = classes[idxs]
scores = scores[idxs]
if masks is not None:
masks = masks[idxs]
if len(boxes) == 0:
return image
# 根据color中文名获取其RGBA值, A值可透明
colors = [ImageColor.getrgb(STANDARD_COLORS[cls % len(STANDARD_COLORS)]) for cls in classes]
# 绘制bbox
if draw_boxes_on_image:
# Draw all boxes onto image.
draw = ImageDraw.Draw(image)
for box, cls, score, color in zip(boxes, classes, scores, colors):
left, top, right, bottom = box
# 绘制目标边界框bbox
# draw.rectangle([(left, top), (right, bottom)], fill=color, width=line_thickness) # 有填充
draw.line([(left, top), (left, bottom), (right, bottom),
(right, top), (left, top)], width=line_thickness, fill=color)
# 绘制类别和概率信息
draw_text(draw, box.tolist(), int(cls), float(score), category_index, color, font, font_size)
# 绘制segmentation, 采用像素赋值的方式
if draw_masks_on_image and (masks is not None): # 需要绘制mask且有mask预测的时候绘制
# Draw all mask onto image.
image = draw_masks(image, masks, colors, mask_thresh)
return image
def draw_text(draw,
box: list,
cls: int,
score: float,
category_index: dict,
color: str,
font: str = 'arial.ttf',
font_size: int = 24):
"""
将目标边界框和类别信息绘制到图片上
"""
try:
font = ImageFont.truetype(font, font_size)
except IOError:
font = ImageFont.load_default()
left, top, right, bottom = box
display_str = f"{category_index[str(cls)]}: {int(100 * score)}%"
# 获取字体高度信息
display_str_height = font.getsize(display_str)[1] * 1.1
# 防止小目标不显示, 上框位于边界时
if top > display_str_height:
text_top = top - display_str_height
else:
text_top = bottom
# 绘制
draw.text(xy=(left, text_top), text=display_str, fill=color, font=font)
展示mask采用像素赋值的思想进行,其中单个mask的维度为[h, w]
def draw_masks(image, masks, colors, thresh: float = 0.7, alpha: float = 0.5):
np_image = np.array(image)
# 进行阈值处理, 二值结果一般都为1
masks = np.where(masks > thresh, True, False) # 对mask进行阈值处理,并二值化,作为index 数组必须匹配
# colors = np.array(colors)
img_to_draw = np.copy(np_image)
# TODO: There might be a way to vectorize this
for mask, color in zip(masks, colors): # masks的第三维度为obj个数
img_to_draw[mask] = color # 此时保证为RGB图像才能赋值, mask维度为[h, w]
# 透明效果实现 mask有透明效果, 其他部分 x * (1 - alpha) + x * alpha = x 仍是原图
out = np_image * (1 - alpha) + img_to_draw * alpha
return fromarray(out.astype(np.uint8))
COCO数据集介绍:https://blog.csdn.net/qq_37541097/article/details/113247318