含掩膜mask的单通道灰度图转化为COCO数据集格式标签的json文件(python)

输入:单通道的灰度图,灰度图内含掩膜mask
目标:把灰度图中的语义mask转换为COCO数据集格式的json文件
输出:COCO数据集格式的json文件

期间遇到的问题:
发现有的掩膜内部存在其他类别的掩膜,即mask内部还套了mask,这种情况的mask怎么只用一个数组来表示?

以下是查找的可用代码:

from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import os
from tqdm import tqdm

def create_sub_masks(mask_image):
    width, height = mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x, y))[:3]

            # If the pixel is not black...
            if pixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)

    return sub_masks


def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
        # import ipdb;ipdb.set_trace()
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)
        polygons.append(poly)
        segmentation = np.array(poly.exterior.coords)
        segmentation = np.maximum(segmentation, 0).ravel().tolist()
        # import ipdb;ipdb.set_trace()
        # print(segmentation)
        #if segmentation == []:
        #    continue
        segmentations.append(segmentation)

    # Combine the polygons to calculate the bounding box and area
    multi_poly = MultiPolygon(polygons)
    if multi_poly.bounds == ():
        return "skip"
    x, y, max_x, max_y = multi_poly.bounds
    # x = max(0, x)
    # y = max(0, y)
    width = max_x - x
    height = max_y - y
    bbox = (x, y, width, height)
    area = multi_poly.area

    annotation = {
        'segmentation': segmentations,
        'iscrowd': is_crowd,
        'image_id': image_id,
        'category_id': category_id,
        'id': annotation_id,
        'bbox': bbox,
        'area': area
    }

    return annotation


def get_name(root, mode_folder=True):
    for root, dirs, file in os.walk(root):
        if mode_folder:
            return sorted(dirs)
        else:
            return sorted(file)


def get_annotation(mask_image_root):
    dataset = {"info": {"year": 2023, "version": "2023", "description": "", "url": "",
                        },
               "license": {},
               "images": [],
               "annotations": [],
               "categories": []}
    class_index = {0: "background",1:'cate1',2:'cate2'}
    for s, k in enumerate(list(class_index.keys())):
        dataset["categories"].append({"id": k, "name": class_index[k], "supercategory": "xxx"})

    is_crowd = 0

    # These ids will be automatically increased as we go
    annotation_id = 0
    image_id = 0

    # Create the annotations
    rrr = maskdir
    for i, root in tqdm(enumerate(mask_image_root)):
        print(i)
        mask_image = Image.open(rrr + root).convert('RGB')
        print(root)
        weight, height = mask_image.size
        # file_name = "rgb_" + root.split("/")[-1].split("_")[-1]
        file_name = mask_image_root[i]
        print(file_name)
        dataset["images"].append({
                                  "file_name": file_name,
                                  "id": i,
                                  "width": weight,
                                  "height": height})
        # import ipdb;ipdb.set_trace()
        sub_masks = create_sub_masks(mask_image)
        # import ipdb;ipdb.set_trace()
        for color, sub_mask in sub_masks.items():
            category_id = 1
            annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
            if annotation == "skip":
                continue
            dataset["annotations"].append(annotation)
            annotation_id += 1
        image_id += 1
    with open("trainmask.json", "w") as f:
        json.dump(dataset, f)



# rrr = "./InstanceSegmentation/"
# all_root = get_name(rrr, mode_folder=False)
# get_annotation(all_root)
if __name__=='__main__':
    maskdir = './mask/trainmask/'
    maskimglist = os.listdir(maskdir)
    get_annotation(maskimglist)

问题:
上述代码仍然存在不足,有的mask太小segmentation输出是 [],这需要检查一下,我在里面输出的位置判断是空就不保存可以避免这种问题,但是bbox等信息有的会出现Nah的情况,需要自己判断处理。

整体上来说,这个代码还是挺好用的。

还有一点,有些内部mask比较极端的情况,代码执行容易出错,建议把问题图像删除,或者自己查找问题修改代码。

更新:实际测试有的格式有问题,以下是修改后版本代码:

from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import os
from tqdm import tqdm

def create_sub_masks(mask_image):
    width, height = mask_image.size

    # Initialize a dictionary of sub-masks indexed by RGB colors
    sub_masks = {}
    for x in range(width):
        for y in range(height):
            # Get the RGB values of the pixel
            pixel = mask_image.getpixel((x, y))[:3]

            # If the pixel is not black...
            if pixel != (0, 0, 0):
                # Check to see if we've created a sub-mask...
                pixel_str = str(pixel)
                sub_mask = sub_masks.get(pixel_str)
                if sub_mask is None:
                   # Create a sub-mask (one bit per pixel) and add to the dictionary
                    # Note: we add 1 pixel of padding in each direction
                    # because the contours module doesn't handle cases
                    # where pixels bleed to the edge of the image
                    sub_masks[pixel_str] = Image.new('1', (width+2, height+2))

                # Set the pixel value to 1 (default is 0), accounting for padding
                sub_masks[pixel_str].putpixel((x+1, y+1), 1)
    # import ipdb;ipdb.set_trace()
    return sub_masks


def create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):
    # Find contours (boundary lines) around each sub-mask
    # Note: there could be multiple contours if the object
    # is partially occluded. (E.g. an elephant behind a tree)
    contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation='low')

    segmentations = []
    polygons = []
    annotations = []
  
    for contour in contours:
        # Flip from (row, col) representation to (x, y)
        # and subtract the padding pixel
       
        for i in range(len(contour)):
            row, col = contour[i]
            contour[i] = (col - 1, row - 1)

        # Make a polygon and simplify it
       
        poly = Polygon(contour)
        poly = poly.simplify(1.0, preserve_topology=False)

        segmentation = np.array(poly.exterior.coords)
        segmentation = np.maximum(segmentation, 0).ravel().tolist()
        # import ipdb;ipdb.set_trace()
        # print(segmentation)
        if segmentation == []:
            continue

        # segmentations.append(segmentation)
        # polygons.append(poly)
        x, y, max_x, max_y = poly.bounds
        width = max_x - x
        height = max_y - y
        bbox = (x, y, width, height)
        area = poly.area

        annotation = {
            'segmentation': [segmentation],
            'iscrowd': is_crowd,
            'image_id': image_id,
            'category_id': category_id,
            'id': annotation_id,
            'bbox': bbox,
            'area': area
        }
        annotations.append(annotation)
        annotation = {}
        annotation_id = annotation_id + 1
    
    return annotations, annotation_id


def get_name(root, mode_folder=True):
    for root, dirs, file in os.walk(root):
        if mode_folder:
            return sorted(dirs)
        else:
            return sorted(file)


def get_annotation(mask_image_root):
    dataset = {"info": {"year": 2023, "version": "2023", "description": "", "url": "",
                        },
               "license": {},
               "images": [],
               "annotations": [],
               "categories": []}
    class_index = {0: "background",1:'junban',2:'yachi'}
    for s, k in enumerate(list(class_index.keys())):
        dataset["categories"].append({"id": k, "name": class_index[k], "supercategory": "yachi"})

    is_crowd = 0

    # These ids will be automatically increased as we go
    annotation_id = 0
    image_id = 0

    # Create the annotations
    rrr = maskdir
    # maskcolor ={}
    # colorid = 1
    for i, root in tqdm(enumerate(mask_image_root)):
        print(i)
        mask_image = Image.open(rrr + root).convert('RGB')
        print(root)
        weight, height = mask_image.size
        # file_name = "rgb_" + root.split("/")[-1].split("_")[-1]
        file_name = mask_image_root[i]
        print(file_name)
        dataset["images"].append({
                                  "file_name": file_name,
                                  "id": i,
                                  "width": weight,
                                  "height": height})
        # import ipdb;ipdb.set_trace()
        sub_masks = create_sub_masks(mask_image)
        
        for color, sub_mask in sub_masks.items():
            
            if color == '(1, 1, 1)':
                category_id = 1
            elif color == '(2, 2, 2)':
                category_id = 2
            annotation, annotation_idout = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)
            if annotation == "skip":
                continue
            for anno in annotation:
                dataset["annotations"].append(anno)
            # annotation_id += 1
            annotation_id = annotation_idout
        image_id += 1
    with open("post_val.json", "w") as f:
        json.dump(dataset, f)



# rrr = "./InstanceSegmentation/"
# all_root = get_name(rrr, mode_folder=False)
# get_annotation(all_root)
if __name__=='__main__':
    maskdir = './mask/valmask/'
    # maskdir = './mask/posttest/mask/'
    maskimglist = os.listdir(maskdir)
    get_annotation(maskimglist)

mask像素值确定类别部分需要自己手动调整修改

你可能感兴趣的:(常用高效技巧,软件方法和命令等,json,python,图像处理,COCO数据集格式)