windows上折腾COCOAPI有点儿麻烦,没问题的嗖的一下就完事儿了,有问题的坑一堆,我这不想折腾了,直接读它的json文件+图片的名字画框算了,以下是Python脚本
"""
Created on Sat May 15 14:17:42 2021
@author: Ryan
"""
import os
import json
import numpy as np
import cv2 # 注意用 cv2 不能有中文路径, 有的话建议用下边 cv_imread 那个函数
def cv_imread(file_name):
# 摘自:
# https://www.zhihu.com/question/67157462/answer/251754530
# 用于解决 cv2 无法直接读取路径含有中文的图片
cv_img = cv2.imdecode(np.fromfile(file_name, dtype=np.uint8), -1)
return cv_img
# 这里路径需要自己改一下
json_path = r"F:\COCO\annotations_trainval2017\annotations\instances_train2017.json" # 这里根据 train 还是 val 自己改
img_path = r"F:\COCO\train2017" # 这里根据 train 还是 val 自己改
img_name = "000000001757.jpg"
with open(json_path, encoding="utf-8") as f: # 这里编码直接使用 UTF-8 (不用可能会报错)
all_coco_ann = json.load(f)
# -----------------------------------------------------------------------------
# --------------------- COCO 标注的大字典里找那张图片的信息 ---------------------
# -----------------------------------------------------------------------------
def get_COCO_img_info(img_name, all_coco_ann):
# 从 COCO 标注的那个大字典里 找 img_name 的名字
# 找到了就返回, 没找到就 return False
for img_info in all_coco_ann["images"]:
if img_info['file_name'] == img_name:
return img_info
else:
continue
return False
img_info = get_COCO_img_info(img_name, all_coco_ann)
img_id = img_info['id'] # 实际上 图片的 id 就是 图片名字的一部分
# -----------------------------------------------------------------------------
# --------------------- COCO 标注的大字典里找那张图片的标注 ---------------------
# -----------------------------------------------------------------------------
def get_COCO_img_anno(img_id, all_coco_ann):
# 根据图片的 id 找标注的信息
# 找到了就返回那个列表, 没找到就 return []
ann_list = []
for ann_info in all_coco_ann["annotations"]:
if ann_info['image_id'] == img_id:
ann_list.append(ann_info)
else:
continue
return ann_list
ann_list = get_COCO_img_anno(img_id, all_coco_ann)
# -----------------------------------------------------------------------------
# ------------------------- 获取你想要的的类别的类别id ------------------------
# -----------------------------------------------------------------------------
def get_categories_needed(category, all_coco_ann):
# category 可以使一个类(字符串) 也可以是好几个类(字符串的列表)
if isinstance(category, str):
category = [category]
cls_id2name = {}
cls_name2id = {}
for cls_info in all_coco_ann["categories"]:
if cls_info['name'] in category:
cls_id2name[cls_info['id']] = cls_info['name']
cls_name2id[cls_info['name']] = cls_info['id']
return cls_id2name, cls_name2id
category = ['person']
cls_id2name, cls_name2id = get_categories_needed(category, all_coco_ann)
# -----------------------------------------------------------------------------
# ---------------------- 根据已选择的类别挑选已获得的标注 ----------------------
# -----------------------------------------------------------------------------
def get_ann_needed(ann_list, cls_id2name):
# 根据标注列表 ann_list 和 需要的类别字典 cls_id2name
ann_you_want = []
for ann in ann_list:
if ann['category_id'] in cls_id2name:
ann_you_want.append( (cls_id2name[ann['category_id']], ann['bbox']) )
return ann_you_want
ann_needed = get_ann_needed(ann_list, cls_id2name)
# -----------------------------------------------------------------------------
# -------------------------------- 读图绘制bbox -------------------------------
# -----------------------------------------------------------------------------
def drawBbox(img_array, ann_needed):
# 在图片上绘制 bbox
# 我没想到下面这句话这么重要!! 后面由于是传的引用会直接在原图上操作
img_array = img_array.copy()
for name, (x_lt, y_lt, w, h) in ann_needed:
img_array = cv2.rectangle(img_array,
(int(x_lt), int(y_lt)),
(int(x_lt+w), int(y_lt+h)),
(0, 255, 0), # 这里可以根据类别自己换颜色
3)
import matplotlib.pyplot as plt
# plt.figure(dpi=1000)
plt.imshow(img_array[:, :, ::-1])
plt.show()
return img_array
img_array = cv_imread(os.path.join(img_path, img_name))
drawBbox(img_array, ann_needed)
这是那个图片:
再补一个画网格和画Anchor的函数
# -----------------------------------------------------------------------------
# --------------------------------- 图片画网格 --------------------------------
# -----------------------------------------------------------------------------
def drawGrid(img_array, zuo=15, xia=15):
# 没好名字了
# zuo 要切分成几行
# xia 要切分成几列
h, w, _ = img_array.shape
zuo_inv = h / zuo
xia_inv = w / xia
lines = []
# 画横线
for i in range(1, zuo):
y = int(zuo_inv * i)
line = [(0, y), (w, y)]
lines.append(line)
# 画竖线
for i in range(1, xia):
x = int(xia_inv * i)
line = [(x, 0), (x, h)]
lines.append(line)
# 绘制
for p1, p2 in lines:
img_array = cv2.line(img_array, p1, p2, (65, 183, 105), 1) # 此处根据需求自己加
import matplotlib.pyplot as plt
plt.figure(dpi=1000)
plt.imshow(img_array[:, :, ::-1])
plt.show()
return img_array
img_array = cv_imread(os.path.join(img_path, img_name))
img_array_grid = drawGrid(img_array)
cv2.imwrite("img_array_grid.png", img_array_grid)
# -----------------------------------------------------------------------------
# -------------------------------- 绘制单个锚框 -------------------------------
# -----------------------------------------------------------------------------
def drawAnchorBox(img_array, anchor_center, w=100, h=100):
# anchor_center 锚框的中点 (x, y)
x, y = anchor_center
pt1 = int(x-w/2), int(y-h/2)
pt2 = int(x+w/2), int(y+h/2)
img_array = cv2.rectangle(img_array, pt1, pt2, (255, 0, 25), 2)
import matplotlib.pyplot as plt
plt.figure(dpi=1000)
plt.imshow(img_array[:, :, ::-1])
plt.show()
return img_array
# img_array_grid = drawAnchorBox(img_array, (250, 250))
# -----------------------------------------------------------------------------
# -------------------------------- 绘制多个锚框 -------------------------------
# -----------------------------------------------------------------------------
def drawAnchorBoxes(img_array, anchor_center, anchor_boxes):
# anchor_center [(x, y), (x, y), (x, y), (x, y), ......]
# anchor_boxes [(宽, 高), (宽, 高), (宽, 高), (宽, 高), ...]
for (x, y), (w, h) in zip(anchor_center, anchor_boxes):
pt1 = int(x-w/2), int(y-h/2)
pt2 = int(x+w/2), int(y+h/2)
img_array = cv2.rectangle(img_array, pt1, pt2, (255, 0, 25), 2)
import matplotlib.pyplot as plt
plt.figure(dpi=1000)
plt.imshow(img_array[:, :, ::-1])
plt.show()
return img_array
# anchor_center = [(50, 30), (300, 50), (120, 300)]
# anchor_boxes = [(150, 30), (60, 50), (120, 30)]
anchor_center = [(191, 234), (191, 234), (191, 234)]
anchor_boxes = [(200, 200), (150, 260), (260, 150)]
img_array_grid = drawAnchorBoxes(img_array, anchor_center, anchor_boxes)
这是绘制了Anchor的,Anchor是随便编的
这个是正经Anchor:
再补充一个计算IoU的:
这个是摘自博客:
https://blog.csdn.net/futangxiang4793/article/details/104482365
写的挺好的
def IoU(bbox, gt):
"""
:param bbox: (n, 4) np.ndarray np.int
:param gt: (m, 4) np.ndarray np.int
:return: (n, m) np.ndarray np.int
numpy 广播机制:从后(低维)向前(高维)对齐,维度为1的可以重复等价为任意维度
eg: (4,3,2) (3,2) | (3,2) 会扩充为(4,3,2)
(4,1,2) (3,2) | (4,1,2) 会扩充为(4,3,2) (3,2)扩充为(4,3,2) (扩充方法为重复)
广播会在numpy的函数 如sum, maximun等函数中进行
扩充维度的方法:
eg: a a.shape: (3,2) a[:, None, :].shape: (3, 1, 2) None对应的维度相当于newaxis
摘自:
https://blog.csdn.net/futangxiang4793/article/details/104482365
"""
lt = np.maximum(bbox[:, None, :2], gt[:, :2]) # left_top (x, y)
rb = np.minimum(bbox[:, None, 2:], gt[:, 2:]) # right_bottom (x, y)
wh = np.maximum(rb - lt + 1, 0) # inter_area (w, h)
inter_areas = wh[:, :, 0] * wh[:, :, 1] # shape: (n, m)
box_areas = (bbox[:, 2] - bbox[:, 0] + 1) * (bbox[:, 3] - bbox[:, 1] + 1)
gt_areas = (gt[:, 2] - gt[:, 0] + 1) * (gt[:, 3] - gt[:, 1] + 1)
IoU = inter_areas / (box_areas[:, None] + gt_areas - inter_areas)
return IoU