在道路上,利用AI算法定位机动车的位置并识别车辆的属性,这些信息对于交警来说是非常重要的。下面我们将对道路上的车辆进行检测和属性识别,整个过程包含两个算法,基于yolov5的目标检测算法和基于多任务的属性识别算法。
个人感觉yolov5算法还是非常nice的,在实际项目和比赛中经常用到。由于一年前我就写过几篇博客对yolov5进行了说明,在此就不再赘述。如果读者感兴趣,可以参考我的博客:YOLOV5实战
下面是我的测试代码,对yolo v5进行了再次封装:
import os
import cv2
import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image,ImageDraw,ImageFont
from models.experimental import attempt_load
from utils.general import letterbox, non_max_suppression, scale_coords
class Yolov5Detect(object):
def __init__(self, weights='./weights/yolov5m.pt', device=0, img_size=(352,352), conf=0.5, iou=0.5):
with torch.no_grad():
self.device = "cuda:%s" % device
self.model = attempt_load(weights, map_location=self.device) # load FP32 model
self.model.half() # to FP16
self.imgsz = img_size # img_size最好是32的整数倍
self.conf = conf
self.iou = iou
temp_img = torch.zeros((1, 3, self.imgsz[0], self.imgsz[1]), device=self.device) # init img
_ = self.model(temp_img.half()) # run once
def pre_process(self, img_path):
img0 = cv2.imread(img_path)
assert img0 is not None, "Image Not Found " + img_path
img = letterbox(img0, new_shape=self.imgsz,auto=False)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
return img, img0
def predict(self, img_path):
img, img0 = self.pre_process(img_path)
img = torch.from_numpy(img).to(self.device)
img = img.half() # uint8 to fp16
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)
# Inference
pred = self.model(img, augment=False)[0]
return pred, img, img0
def post_process(self, img_path):
pred, img, img0 = self.predict(img_path)
# Apply NMS
pred = non_max_suppression(pred, self.conf, self.iou, classes=None, agnostic=False)
pred, im0 = pred[0], img0
if pred is not None and len(pred):
pred[:, :4] = scale_coords(img.shape[2:], pred[:, :4], im0.shape).round()
pred = pred.cpu().detach().numpy().tolist() # from tensor to list
return pred, img0
def draw_box_string(img, box, string):
x,y,w,h = box
cv2.rectangle(img, (x,y), (x+w, y+h), (0,0,255), 2)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
draw = ImageDraw.Draw(img)
font = ImageFont.truetype("simhei.ttf", 24, encoding="utf-8")
draw.text((x+w, y), string, (0, 255, 0), font=font)
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
return img
def get_image_list(image_dir, suffix=['jpg', 'jpeg', 'JPG', 'JPEG','png']):
'''get all image path ends with suffix'''
if not os.path.exists(image_dir):
print("PATH:%s not exists" % image_dir)
return []
imglist = []
for root, sdirs, files in os.walk(image_dir):
if not files:
continue
for filename in files:
filepath = os.path.join(root, filename)
if filename.split('.')[-1] in suffix:
imglist.append(filepath)
return imglist
if __name__ == '__main__':
from tqdm import tqdm
detector = Yolov5Detect()
img_list = get_image_list("test_imgs")
for img_path in tqdm(img_list):
img_name = os.path.basename(img_path)
pred, img0 = detector.post_process(img_path)
pred = [i for i in pred if i[-1]==2.0] # 2.0是car的标签
if pred is None:
cv2.imwrite(os.path.join("output", os.path.basename(img_path)), img0)
continue
for obj in pred:
x1, y1, x2, y2, conf, label = obj
box = [int(x1), int(y1), int(x2-x1), int(y2-y1)]
string = "%s:%.3f" % ("car", conf)
img0 = draw_box_string(img0, box, string)
cv2.imwrite(os.path.join("output", img_name), img0)
基于多任务的属性识别算法:有多个head, 每个分支识别车辆的一种属性。
博主之前也写过类似的文章,详细解读可参考:行人属性识别
车辆属性识别的训练过程如图所示,中间结果各个属性识别的准确率基本超过了90%:
考虑到识别的实时性,这里的识别模型用了非常小的模型,也便于算法部署到边缘设备。
下面是车辆属性识别的测试代码:
import os
import cv2
import glob
import torch
from torchvision import transforms as T
from torch.nn import DataParallel
from tqdm import tqdm
from PIL import Image,ImageDraw,ImageFont
import numpy as np
from MobileNetV2 import mobilenet_v2
class Car_recog(object):
def __init__(self, model_path="./checkpoint/mobilenet-v2_30.pth"):
self.device = torch.device("cuda")
self.net = mobilenet_v2().to(self.device)
self.net = DataParallel(self.net)
self.weights = model_path
self.net.load_state_dict(torch.load(self.weights))
normalize = T.Normalize(mean = [0.5, 0.5, 0.5],
std = [0.5, 0.5, 0.5]
)
self.transforms = T.Compose([
T.ToTensor(),
normalize
])
def recog(self, img):
# img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = Image.fromarray(img)
img_RGB = img.copy()
img = img.resize((256, 256))
img = self.transforms(img)
img = img.unsqueeze(0)
with torch.no_grad():
self.net.eval()
img_input = img.to(self.device)
outputs_color, outputs_type, outputs_sub_type = self.net(img_input)
outputs_color = torch.softmax(outputs_color, 1)
outputs_type = torch.softmax(outputs_type, 1)
outputs_sub_type = torch.softmax(outputs_sub_type, 1)
label_color = outputs_color.argmax()
label_type = outputs_type.argmax()
label_sub_type = outputs_sub_type.argmax()
return img_RGB, label_color, label_type, label_sub_type
if __name__ == "__main__":
color_name = ['白色', '黑色', '红色', '黄色', '灰色', '蓝色','绿色', '棕色']
orientation_name = ['车头', '车尾']
type_name = ["car", 'SUV', "MPV", "中型客车", "大型客车"]
car_recog = Car_recog()
img_list = [os.path.join("test_imgs", i) for i in os.listdir("test_imgs")]
for img_path in img_list:
img = cv2.imread(img_path)
img_RGB, label_color, label_ori, label_type = car_recog.recog(img)
result = "颜色:%s, 朝向:%s, 类型:%s" % (color_name[label_color], orientation_name[label_ori], type_name[label_type])
print("车辆属性识别结果:%s" % result)
# 把车属性的识别结果画到图上
draw = ImageDraw.Draw(img_RGB)
font = ImageFont.truetype("./simhei.ttf", 24, encoding="utf-8")
draw.text((0, 0), result, (255, 0, 0), font=font)
img_BGR = cv2.cvtColor(np.array(img_RGB), cv2.COLOR_RGB2BGR)
cv2.imwrite(os.path.join("result_test", os.path.basename(img_path)), img_BGR)
整个算法的流程是:先用yolo v5检测算法检测到车辆,然后用车辆属性识别算法识别车辆的属性,最后将结果画到图上。
整个流程的测试代码如下:
import os
import cv2
import glob
import torch
from torchvision import transforms as T
from torch.nn import DataParallel
from tqdm import tqdm
from PIL import Image,ImageDraw,ImageFont
import numpy as np
import sys
sys.path.insert(1, "car_attributes")
sys.path.insert(1, "yolov5.2")
from test_img import Car_recog
from demo import Yolov5Detect, draw_box_string
color_name = ['白色', '黑色', '红色', '黄色', '灰色', '蓝色','绿色', '棕色']
orientation_name = ['车头', '车尾']
type_name = ["car", 'SUV', "MPV", "中型客车", "大型客车"]
if __name__ == "__main__":
car_recog = Car_recog("car_attributes/checkpoint/mobilenet-v2_30.pth")
detector = Yolov5Detect("yolov5.2/weights/yolov5m.pt")
print("load model successfuly")
img_path = "yolov5.2/test_imgs/test.png" # 测试图片路径
pred, img0 = detector.post_process(img_path)
pred = [i for i in pred if i[-1]==2.0] # 2.0是汽车的标签
for obj in pred:
x1, y1, x2, y2, conf, label = obj
box = [int(x1), int(y1), int(x2-x1), int(y2-y1)]
x1, y1, w, h = box
img_car = img0[y1:y1+h, x1:x1+w]
img_RGB, label_color, label_ori, label_type = car_recog.recog(img_car)
result = "颜色:%s\n朝向:%s\n类型:%s" % (color_name[label_color], orientation_name[label_ori], type_name[label_type])
print("车辆属性识别结果:%s" % result)
img0 = draw_box_string(img0, box, result)
cv2.imwrite("result.jpg", img0)
注:本文所用的训练数据都是公开数据集,如果你需要训练好的模型或者训练数据或者技术交流,请留言或者微信,近期会将代码开源。