将yolov4预测结果保存为json文件

将yolov4预测结果保存为json文件

如果代码是用mmdetection框架写的,转化部分的代码不需要自己来写,mmdetection自带的脚本可以把预测结果转为json文件。
只需运行python tools/test.py configs/involution/faster_rcnn_red50_neck_fpn_1x_coco.py work_dirs/faster_rcnn_red50_neck_fpn_1x_coco/latest.pth --format-only --options "jsonfile_prefix=./results"即可得到预测结果文件,具体使用方法可以查阅mmdetection的官方文档。
如果没有用到mmdetection框架,可以自己简单修改一下预测部分的代码也是可以将预测结果保存为json文件的。

修改detect_image()函数

参考yolo.py文件中的detect_image()函数,定义save_json函数,返回值不再是带框的图片,而是返回存放框信息的content_json列表,所以在没有检测到物体的时候,返回值不再是原图而是一个空列表。
这里有两种生成json文件的方法,第一种是先读取测试集文件夹下所有的图片名称,然后读取官方给的测试集对应的json文件,如果找到该图片的file_name,则读取image_id,categories_id和预测框bbox以及scores的信息,保存在content_json列表中,在predict.py中调用yolo.py中写好的save_json函数,将结果保存在result.json文件中。

def save_json(self,image,file_name,path_json):
    image_shape = np.array(np.shape(image)[0:2])
        
    if self.letterbox_image:
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
    else:
        crop_img = image.convert('RGB')
        crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
    photo = np.array(crop_img,dtype = np.float32) / 255.0
    photo = np.transpose(photo, (2, 0, 1))
    
    images = [photo]
    
    with torch.no_grad():
        images = torch.from_numpy(np.asarray(images))
        if self.cuda:
            images = images.cuda()

        outputs = self.net(images)
        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))

        
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                                conf_thres=self.confidence,
                                                nms_thres=self.iou)
        
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return []
        
        top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
        top_label = np.array(batch_detections[top_index,-1],np.int32)
        top_bboxes = np.array(batch_detections[top_index,:4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

        if self.letterbox_image:
            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
        else:
            top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
            top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
            top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
            top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
            boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
            
    font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

    thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
    content_json=[]
    for i, c in enumerate(top_label):
        predicted_class = self.class_names[c]
        score = top_conf[i]

        top, left, bottom, right = boxes[i]
        w=right-left
        h=bottom-top

        y = float(max(0, top))
        x = float(max(0, left))
        w = float(max(0, w))
        h = float(max(0, h))
        score = float(score)
        bbox=[x,y,w,h]

        # print(label, top, left, bottom, right)
        f = open(path_json, 'r', encoding='utf-8') 
        s = f.read()
        rest = json.loads(s)
        for i in rest['images']:
            if i['file_name']!=file_name:
                continue
            if i['file_name']==file_name:
                image_id=i['id']
                cat_id=rest['categories'][c]['id']
                content_dic = {
                    "image_id":image_id,
                    "category_id": cat_id,
                    "bbox": bbox,
                    "score": score
                }
                # print(content_dic)
                content_json.append(content_dic)   
                break
        f.close()  
    return content_json

接着修改predict.py

首先用for i in range(3)输入三张图片测试一下代码有没有问题。

from PIL import Image
import cv2 
from yolo import YOLO
import os
import json
yolo = YOLO()
content=[]
for i in range(3):
    file_name = input('Input image filename:')
    path = "./data/coco/test2017/"
    path_json = "./data/coco/annotations/instances_test2017.json"
    path_save = "./result.json"
    try:
       image = Image.open(path+file_name)
    except:
       print('Open Error! Try again!')
       continue
    else:
       content.extend(yolo.save_json(image,file_name,path_json))
       print(content)
with open(path_save, 'a') as f:
   json.dump(content, f)

没有问题后,用下面的代码将测试集里面的图片全部检测结果保存在result.json文件中。

content=[]
path = "./data/coco/test2017/"
path_json = "./data/coco/annotations/instances_test2017.json"
path_save = "./result.json"
open(path_save, 'w')
for file_name in os.listdir(path):
    image = Image.open(path+file_name)
    info = yolo.save_json(image,file_name,path_json)
    content.extend(info)
with open(path_save, 'a') as f:
    json.dump(content, f)

或者用下面的方法,先按照官方文档给出的测试集图片的信息,依次读取图片名,然后取对应的图片进行测试,这样就不用进行上面的判断了。

predict.py

from PIL import Image
import cv2 
from yolo import YOLO
import os
import json
yolo = YOLO()
path = "./data/coco/test2017/"
path_json = "./data/coco/annotations/instances_test2017.json"
path_save = "./result.json"
content=[]
f = open(path_json, 'r', encoding='utf-8') 
s = f.read()
rest = json.loads(s)
for i in rest['images']:
   file_name = i["file_name"]
   image_id = i["id"]
   image = Image.open(path+file_name)
   content.extend(yolo.save_json(image,file_name,image_id,rest['categories']))
f.close()  
with open(path_save, 'a') as f:
   json.dump(content, f)

yolo.py

def save_json(self,image,file_name,image_id,categories):
    image_shape = np.array(np.shape(image)[0:2])
        
    if self.letterbox_image:
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
    else:
        crop_img = image.convert('RGB')
        crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
    photo = np.array(crop_img,dtype = np.float32) / 255.0
    photo = np.transpose(photo, (2, 0, 1))
    
    images = [photo]
    
    with torch.no_grad():
        images = torch.from_numpy(np.asarray(images))
        if self.cuda:
            images = images.cuda()

        outputs = self.net(images)
        output_list = []
        for i in range(3):
            output_list.append(self.yolo_decodes[i](outputs[i]))

        
        output = torch.cat(output_list, 1)
        batch_detections = non_max_suppression(output, len(self.class_names),
                                                conf_thres=self.confidence,
                                                nms_thres=self.iou)
        
        try:
            batch_detections = batch_detections[0].cpu().numpy()
        except:
            return []
        
        top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
        top_label = np.array(batch_detections[top_index,-1],np.int32)
        top_bboxes = np.array(batch_detections[top_index,:4])
        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)

        if self.letterbox_image:
            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
        else:
            top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
            top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
            top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
            top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
            boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
            
    font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

    thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)
    content_json=[]
    for i, c in enumerate(top_label):
        predicted_class = self.class_names[c]
        score = top_conf[i]

        top, left, bottom, right = boxes[i]
        w=right-left
        h=bottom-top

        y = float(max(0, top))
        x = float(max(0, left))
        w = float(max(0, w))
        h = float(max(0, h))
        score = float(score)
        bbox=[x,y,w,h]

        # print(label, top, left, bottom, right)
        cat_id=categories[c]['id']
	    content_dic = {
	        "image_id":image_id,
	        "category_id": cat_id,
	        "bbox": bbox,
	        "score": score
	    }
    	content_json.append(content_dic)   
    return content_json

你可能感兴趣的:(目标检测,python,深度学习,json)