COCO-stuff API[1]是 COCO API[2]的扩展,安装见 [3],这里研究一下 COCO-stuff 的用法。
下载链见 [4]。
训练集图片 train2017.zip、验证集图片 val2017.zip,分别解压出 train2017/
和 val2017/
,然后统一放去 images/
(可以放软链接,见 [5])。
thing 和 stuff 合并的 mask stuffthingmaps_trainval2017.zip,也是解压出 train2017/
和 val2017/
,不过装的是 .png 形式的 mask,文件名、shape 与 image 对应,uint8 型,同时包含 thing 和 stuff 的 mask,数值与 label 的对应见后文分析。
thing 和 stuff 的 mask 也有拆开的两份:原 COCO 单纯 thing 的 mask annotations_trainval2017.zip、COCO-stuff 新加的 stuff mask stuff_trainval2017.zip。解压出 .json 形式的 mask,用 COCO-stuff 的 API 读。此时 stuff 的 mask 会多一个 183
号类,叫 other
,[4] 有讲,详见后文。
这些 mask 的文件全部解压去 annotations/
下(原 COCO 的 mask 解压出的就是 annotations/ 目录,其它解压出的也移进去)。
import os
import sys
import json
import numpy as np
import cv2
import matplotlib.pyplot as plt
# 参考 [3],先下好 COCO-stuff 的 API
sys.path.append("G:/dataset/COCO-stuff/cocostuffapi/PythonAPI")
from pycocotools.coco import COCO
P = "G:/dataset/COCO-stuff"
ANNO_P = os.path.join(P, "annotations") # 所有 mask 都放在这
MASK_P_TRAIN = os.path.join(ANNO_P, "train2017") # train set 的 png mask
MASK_P_VAL = os.path.join(ANNO_P, "val2017") # val set 的 png mask
IMAGE_P = os.path.join(P, "images") # 所有 image(的软链接)都放在这
LABEL_F = os.path.join(P, "cocostuff", "labels.txt")
label 有两个出处:
0
是 unlabeled
,没有 183
的 other
;cls_id = {}
# 自己加个 `183: other`
# 见 https://github.com/nightrome/cocostuff#downloads
cls_id["other"] = 183
with open(LABEL_F, "r") as f:
for line in f:
cid, cname = line.strip().split(": ")
cls_id[cname] = int(cid)
# print(cid, ":", cname)
id_cls = {cls_id[k]: k for k in cls_id}
# thing 的类
# thing_anno = os.path.join(ANNO_P, "instances_train2017.json")
thing_anno = os.path.join(ANNO_P, "instances_val2017.json")
coco_thing = COCO(thing_anno)
# stuff 的类
# stuff_anno = os.path.join(ANNO_P, "stuff_train2017.json")
stuff_anno = os.path.join(ANNO_P, "stuff_val2017.json")
coco_stuff = COCO(stuff_anno)
80
和 92
80
thing、91
stuff、加一个 unlabeled
0
号 unlabeled
和 183
号 other
是对应的?thing_cat
和 stuff_cat
的定义在下一节 comparison 里…cls_id_thing = {m["name"]: m["id"] for m in thing_cat}
id_cls_thing = {cls_id_thing[k]: k for k in cls_id_thing}
print(len(cls_id_thing)) # 80
cls_id_stuff = {m["name"]: m["id"] for m in stuff_cat}
id_cls_stuff = {cls_id_stuff[k]: k for k in cls_id_stuff}
print(len(cls_id_stuff)) # 92
12
个类,stuff anno 多了个 183
的 other
# labels.txt
for cid in id_cls:
print(cid, id_cls[cid])
# .json 的 thing
thing_cat_id = coco_thing.getCatIds()
print(thing_cat_id)
thing_cat = coco_thing.loadCats(thing_cat_id)
for tc in thing_cat:
print(tc)
# .json 的 stuff
stuff_cat_id = coco_stuff.getCatIds()
print(stuff_cat_id)
stuff_cat = coco_stuff.loadCats(stuff_cat_id)
for tc in stuff_cat:
print(tc)
# labels.txt 和 thing anno
for cid in id_cls_thing:
if cid not in id_cls:
print("more:", cid, id_cls_thing[cid]) # 无输出
elif id_cls[cid] != id_cls_thing[cid]:
print("diff:", cid, id_cls[cid], id_cls_thing[cid]) # 无输出
print("DONE")
# labels.txt 和 stuff anno
for cid in id_cls_stuff:
if cid not in id_cls:
print("more:", cid, id_cls_stuff[cid]) # 只有 `more: 183 other`
elif id_cls[cid] != id_cls_stuff[cid]:
print("diff:", cid, id_cls[cid], id_cls_thing[cid]) # 无输出
print("DONE")
# labels.txt 和 (thing + stuff) anno
for cid in id_cls:
if cid in id_cls_thing:
if cid in id_cls_stuff:
print("duplicate:", cid, id_cls[cid], id_cls_thing[cid], id_cls_stuff[cid])
elif id_cls[cid] != id_cls_thing[cid]:
print("diff:", cid, id_cls[cid], id_cls_thing[cid])
elif cid in id_cls_stuff:
if id_cls[cid] != id_cls_stuff[cid]:
print("diff:", cid, id_cls[cid], id_cls_stuff[cid])
else: #(cid not in id_cls_thing) and (cid no in id_cls_stuff):
print("less:", cid, id_cls[cid]) # 12 条输出
print("DONE")
测试 thing 和 stuff 的 .json 中,image 是否对得上。
img_id_thing = coco_thing.getImgIds()
img_id_stuff = coco_stuff.getImgIds()
print(len(img_id_thing), len(img_id_stuff)) # 5000 5000
for i, (th, st) in enumerate(zip(img_id_thing, img_id_stuff)):
if th != st:
print(i, th, st) # 无输出
print("DONE")
img_th = coco_thing.loadImgs(img_id_thing)
img_st = coco_stuff.loadImgs(img_id_stuff)
# 随机抽一张,看下字段
print(img_th[10])
print(img_st[10])
{'license': 3, 'file_name': '000000296649.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000296649.jpg', 'height': 427, 'width': 640, 'date_captured': '2013-11-15 15:26:19', 'flickr_url': 'http://farm4.staticflickr.com/3577/3491669985_d81e1050c6_z.jpg', 'id': 296649}
{'license': 3, 'file_name': '000000296649.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000296649.jpg', 'height': 427, 'width': 640, 'date_captured': '2013-11-15 15:26:19', 'flickr_url': 'http://farm4.staticflickr.com/3577/3491669985_d81e1050c6_z.jpg', 'id': 296649}
file_name
字段img_th[0]
/ im_th
/ img_st[0]
/ im_st
,后面的测试全是用它# 由 stuff 的 json 信息读图
im_th = cv2.imread(os.path.join(IMAGE_P, img_th[0]["file_name"]))
im_th = cv2.cvtColor(im_th, cv2.COLOR_BGR2RGB)
plt.imshow(im_th);
plt.show()
# 由 stuff 的 json 信息读图,一样的
im_st = cv2.imread(os.path.join(IMAGE_P, img_st[0]["file_name"]))
im_st = cv2.cvtColor(im_st, cv2.COLOR_BGR2RGB)
# plt.imshow(im_st);
# plt.show()
mask 有两种来源:
这里用同一张图测试。
annIds_th = coco_thing.getAnnIds(imgIds=img_th[0]["id"])
anns_th = coco_thing.loadAnns(annIds_th)
print("#anno:", len(anns_th))
plt.imshow(im_th)
coco_thing.showAnns(anns_th)
plt.show();
print([id_cls[m["category_id"]] for m in anns_th]) # 输出对应 class
annIds_st = coco_stuff.getAnnIds(imgIds=img_st[0]["id"])
anns_st = coco_stuff.loadAnns(annIds_st)
# for i, a in enumerate(anns_st):
# if 183 == a["category_id"]:
# del anns_st[i]
print("#anno:", len(anns_st))
plt.imshow(im_st);
coco_stuff.showAnns(anns_st)
plt.show();
print([id_cls[m["category_id"]] for m in anns_st]) # 输出对应 class
stuff 的 mask 包含 other
,[4] 说它包含所有非 stuff 的 pixel,应该指 thing 的那些?
image_id
是对应的 image ID;area
是对应 region 的面积(pixel 数?);category_id
是 label 的 class ID,见前文;id
是这个 mask 的 ID,验证过,每个 mask 都有一个惟一的 ID;bbox
没用过,应该就是那个 object 的框print("--- thing anno ---")
print(anns_th)
print("--- stuff anno ---")
print(anns_st)
other
和 thing 的 mask 基本重复# 原图尺寸
print("image shape:", im_th.shape, im_st.shape) # (427, 640, 3) (427, 640, 3)
print("image area:", im_th.shape[0] * im_th.shape[1]) # 273280
# stuff mask 面积和
sum_area = 0
for m in anns_st:
sum_area += m["area"]
print("stuff area sum:", sum_area) # 269801.0
# thing + sutff mask 面积和
for m in anns_th:
sum_area += m["area"]
print("(thing + stuff) area sum:", sum_area) # 369899.52395000006
要对比 png 形式的 mask 和前面 json 形式的 mask,看对不对得上。
[4] 说 png mask 的像素值就对应相应的 class,而 255
对应 unlabeled
或 void class
mask_f = img_st[0]["file_name"].replace(".jpg", ".png")
png_mask = cv2.imread(os.path.join(MASK_P_VAL, mask_f))
print(type(png_mask), png_mask.dtype, png_mask.shape, im_st.shape)
# 输出: uint8 (427, 640, 3) (427, 640, 3)
plt.imshow(png_mask);
person
,然而由 labels.txt,0 号类是 unlabeled
,对不上!mask_0 = (0 == png_mask)
if mask_0.sum() > 0:
plt.imshow(np.where(mask_0, 0, 255))
plt.show()
else:
print("this image has NOT this class")
0
对应 person
,而 labels.txt 中 1
号类才是 person
,考虑将 png 中的像素值 +1
后再与 labels.txt 对拍。还真对上了+1
后作为 class ID,与 labels.txt 对得上# json 导出的 class set
cls_set_json = set(
[id_cls[m["category_id"]] for m in anns_th] +
[id_cls[m["category_id"]] for m in anns_st])
# print(cls_set_json) # 太长不看
# png 导出的 class set
cls_set_png = set()
for cls in range(png_mask.max()):
mask = (m == cls)
area = mask.sum()
if area > 0:
# print(cls, id_cls[cls + 1], area)
cls_set_png.add(id_cls[cls + 1]) # png 像素值 +1 作为 class ID
# print(cls_set_png) # 太长不看
# 两个 class set 的差异
print(len(cls_set_json) == len(cls_set_png))
print(cls_set_json - cls_set_png)
print(cls_set_png - cls_set_json)
other
False
{'other'}
set()