如果代码是用mmdetection框架写的,转化部分的代码不需要自己来写,mmdetection自带的脚本可以把预测结果转为json文件。
只需运行python tools/test.py configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py work_dirs/faster_rcnn_red50_neck_fpn_1x_coco/latest.pth --format-only --options "jsonfile_prefix=./results"
即可得到预测结果文件,具体使用方法可以查阅mmdetection的官方文档。
如果没有用到mmdetection框架,可以自己简单修改一下预测部分的代码也是可以将预测结果保存为json文件的。
参考yolo.py文件中的detect_image()函数,定义save_json函数,返回值不再是带框的图片,而是返回存放框信息的content_json列表,所以在没有检测到物体的时候,返回值不再是原图而是一个空列表。
这里有两种生成json文件的方法,第一种是先读取测试集文件夹下所有的图片名称,然后读取官方给的测试集对应的json文件,如果找到该图片的file_name,则读取image_id,categories_id和预测框bbox以及scores的信息,保存在content_json列表中,在predict.py中调用yolo.py中写好的save_json函数,将结果保存在result.json文件中。
def save_json(self,image,file_name,path_json):
image_shape = np.array(np.shape(image)[0:2])
if self.letterbox_image:
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
else:
crop_img = image.convert('RGB')
crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
images = [photo]
with torch.no_grad():
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
outputs = self.net(images)
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return []
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
if self.letterbox_image:
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
else:
top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
content_json=[]
for i, c in enumerate(top_label):
predicted_class = self.class_names[c]
score = top_conf[i]
top, left, bottom, right = boxes[i]
w=right-left
h=bottom-top
y = float(max(0, top))
x = float(max(0, left))
w = float(max(0, w))
h = float(max(0, h))
score = float(score)
bbox=[x,y,w,h]
# print(label, top, left, bottom, right)
f = open(path_json, 'r', encoding='utf-8')
s = f.read()
rest = json.loads(s)
for i in rest['images']:
if i['file_name']!=file_name:
continue
if i['file_name']==file_name:
image_id=i['id']
cat_id=rest['categories'][c]['id']
content_dic = {
"image_id":image_id,
"category_id": cat_id,
"bbox": bbox,
"score": score
}
# print(content_dic)
content_json.append(content_dic)
break
f.close()
return content_json
首先用for i in range(3)输入三张图片测试一下代码有没有问题。
from PIL import Image
import cv2
from yolo import YOLO
import os
import json
yolo = YOLO()
content=[]
for i in range(3):
file_name = input('Input image filename:')
path = "./data/coco/test2017/"
path_json = "./data/coco/annotations/instances_test2017.json"
path_save = "./result.json"
try:
image = Image.open(path+file_name)
except:
print('Open Error! Try again!')
continue
else:
content.extend(yolo.save_json(image,file_name,path_json))
print(content)
with open(path_save, 'a') as f:
json.dump(content, f)
没有问题后,用下面的代码将测试集里面的图片全部检测结果保存在result.json文件中。
content=[]
path = "./data/coco/test2017/"
path_json = "./data/coco/annotations/instances_test2017.json"
path_save = "./result.json"
open(path_save, 'w')
for file_name in os.listdir(path):
image = Image.open(path+file_name)
info = yolo.save_json(image,file_name,path_json)
content.extend(info)
with open(path_save, 'a') as f:
json.dump(content, f)
或者用下面的方法,先按照官方文档给出的测试集图片的信息,依次读取图片名,然后取对应的图片进行测试,这样就不用进行上面的判断了。
from PIL import Image
import cv2
from yolo import YOLO
import os
import json
yolo = YOLO()
path = "./data/coco/test2017/"
path_json = "./data/coco/annotations/instances_test2017.json"
path_save = "./result.json"
content=[]
f = open(path_json, 'r', encoding='utf-8')
s = f.read()
rest = json.loads(s)
for i in rest['images']:
file_name = i["file_name"]
image_id = i["id"]
image = Image.open(path+file_name)
content.extend(yolo.save_json(image,file_name,image_id,rest['categories']))
f.close()
with open(path_save, 'a') as f:
json.dump(content, f)
def save_json(self,image,file_name,image_id,categories):
image_shape = np.array(np.shape(image)[0:2])
if self.letterbox_image:
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
else:
crop_img = image.convert('RGB')
crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1))
images = [photo]
with torch.no_grad():
images = torch.from_numpy(np.asarray(images))
if self.cuda:
images = images.cuda()
outputs = self.net(images)
output_list = []
for i in range(3):
output_list.append(self.yolo_decodes[i](outputs[i]))
output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, len(self.class_names),
conf_thres=self.confidence,
nms_thres=self.iou)
try:
batch_detections = batch_detections[0].cpu().numpy()
except:
return []
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
if self.letterbox_image:
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
else:
top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))
thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
content_json=[]
for i, c in enumerate(top_label):
predicted_class = self.class_names[c]
score = top_conf[i]
top, left, bottom, right = boxes[i]
w=right-left
h=bottom-top
y = float(max(0, top))
x = float(max(0, left))
w = float(max(0, w))
h = float(max(0, h))
score = float(score)
bbox=[x,y,w,h]
# print(label, top, left, bottom, right)
cat_id=categories[c]['id']
content_dic = {
"image_id":image_id,
"category_id": cat_id,
"bbox": bbox,
"score": score
}
content_json.append(content_dic)
return content_json