【吴恩达深度学习编程作业】4.3目标检测——车辆识别

参考文章:车辆识别
这章的编程作业bug解决了很长时间,我将问题进行了汇总:【吴恩达深度学习编程作业问题汇总】4.3目标检测——车辆识别

自动驾驶——车辆识别

main.py

"""
    自动驾驶--使用YOLO算法进行汽车对象识别
"""

import os
import matplotlib.pyplot as plt
import imageio
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from keras import backend as K  # 导入Keras的后台命名为K
from keras.models import load_model
from Deep_Learning.test4_3.yad2k.models.keras_yolo import yolo_head, yolo_boxes_to_corners
from Deep_Learning.test4_3.yolo_utils import read_classes, read_anchors, generate_colors, scale_boxes, preprocess_image, draw_boxes


# 分类阈值过滤:去掉预测值低于预设值的锚框
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.6):
    """
    通过阈值来过滤对象和分类的置信度
    :param box_confidence:  -tensor类型,维度为(19,19,5,1),包含19X19单元格中每个单元格预测的5个锚框中的所有锚框的pc(一些对象的置信概率)
    :param boxes:           -tensor类型,维度为(19,19,5,4),包含了所有锚框的(px,py,ph,pw)
    :param box_class_probs: -tensor类型,维度为(19,19,5,80),包含了所有单元格中所有锚框的所有对象(c1,c2,c3,...,c80)检测的概率
    :param threshold:       -实数,阈值,如果分类预测概率高于它,那么这个分类预测的概率就会被保留
    :return: scores     -tensor类型,维度为(None,),包含了保留了的锚框的分类概率
             boxes      -tensor类型,维度为(None,4),包含了保留了锚框的(bx,by,bh,bw)
             classes    -tensor类型,维度为(None,),包含了保留了的锚框的索引

        注意:"None"是因为不知道所选框的确切数量,因为它取决于阈值
             如果有10个锚框,scores的实际输出大小将是(10,)
    """

    # 1.计算锚框的得分
    box_scores = box_confidence * box_class_probs

    # 2.找到最大值的锚框的索引以及对应的最大值的锚框的分数
    box_classes = K.argmax(box_scores, axis=-1)
    box_classes_scores = K.max(box_scores, axis=-1)

    # 3.根据阈值创建掩码
    filtering_mask = (box_classes_scores >= threshold)

    # 对scores,boxes,classes使用掩码
    # tf.boolean_mask(a, mask)函数将使a矩阵仅保留与mask中True元素同下标的部分,即通过mask提取数据。
    scores = tf.boolean_mask(box_classes_scores, filtering_mask)
    boxes = tf.boolean_mask(boxes, filtering_mask)
    classes = tf.boolean_mask(box_classes, filtering_mask)

    return scores, boxes, classes

print("==========================测试yolo_filter_boxes======================")
with tf.compat.v1.Session() as test_a:
    # tf.random_normal()函数用于从“服从指定正态分布的序列”中随机取出指定个数的值。
    box_confidence = tf.compat.v1.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1)
    boxes = tf.compat.v1.random_normal([19, 19, 5, 4], mean=1, stddev=4, seed=1)
    box_class_probs = tf.compat.v1.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1)
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold=0.5)

    print("scores[2] = " + str(scores[2].eval()))       # scores[2] = 10.750582
    print("boxes[2] = " + str(boxes[2].eval()))         # boxes[2] = [ 8.426533   3.2713668 -0.5313436 -4.9413733]
    print("classes[2] = " + str(classes[2].eval()))     # classes[2] = 7
    print("scores.shape = " + str(scores.shape))        # scores.shape = (None,)
    print("boxes.shape = " + str(boxes.shape))          # boxes.shape = (None, 4)
    print("classes.shape = " + str(classes.shape))      # classes.shape = (None,)

    test_a.close()


# 非最大值抑制:虽然通过阈值过滤了一些得分低的分类,但是依旧有很多锚框被保留,使用交并比过滤
def iou(box1, box2):
    """
    实现两个锚框的交并比的计算
    :param box1:    -第一个锚框,元组类型,(x1,y1,x2,y2)
    :param box2:    -第二个锚框,元组类型,(x1,y1,x2,y2)
    :return: iou    -实数,交并比
    """

    # 计算相交的区域的面积
    xi1 = np.maximum(box1[0], box2[0])
    yi1 = np.maximum(box1[1], box2[1])
    xi2 = np.minimum(box1[2], box2[2])
    yi2 = np.minimum(box1[3], box2[3])
    inter_area = (xi1 - xi2) * (yi1 - yi2)

    # 计算并集,Union(A,B) = A + B - Inter(A,B)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area

    # 计算交并比
    iou = inter_area / union_area

    return iou

print("====================测试iou================")
box1 = (2, 1, 4, 3)
box2 = (1, 2, 3, 4)
print("iou = " + str(iou(box1, box2)))  # iou = 0.14285714285714285


def yolo_non_max_suppression(scores, boxes, classes, max_boxes=10, iou_threshold=0.5):
    """
    为锚框实现非最大值抑制(Non-max suppression (NMS))
    :param scores:      -tensor类型,维度为(None,),yolo_filter_boxes()的输出
    :param boxes:       -tensor类型,维度为(None,4),yolo_filter_boxes()的输出,已缩放到图像大小
    :param classes:     -tensor类型,维度为(None,),yolo_filter_boxes()的输出
    :param max_boxes:   -整数,预测的锚框数量的最大值
    :param iou_threshold:   -实数,交并比阈值
    :return: scores     -tensor类型,维度为(None,),每个锚框的预测的可能值
             boxes      -tensor类型,维度为(None,4),预测的锚框的坐标
             classes    -tensor类型,维度为(None,),每个锚框的预测的分类

        注意:"None"是明显小于max_boxes的,这个函数也会改变scores、boxes、classes的维度,这会为下一步操作提供方便
    """

    max_boxes_tensor = K.variable(max_boxes, dtype="int32")     # 用于tf.image.non_max_suppression()
    print(max_boxes_tensor)     # 
    tf.compat.v1.keras.backend.get_session().run(tf.compat.v1.variables_initializer([max_boxes_tensor]))     # 初始化变量max_boxes_tensor

    # 使用tf.image.non_max_suppression()来获取与我们保留的框相对应的索引列表
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes, iou_threshold)

    # 使用K.gather()来选择保留的锚框
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)

    return scores, boxes, classes

print("======================测试yolo_non_max_suppression================")
with tf.compat.v1.Session() as test_b:
    scores = tf.compat.v1.random_normal([54, ], mean=1, stddev=4, seed=1)
    boxes = tf.compat.v1.random_normal([54, 4], mean=1, stddev=4, seed=1)
    classes = tf.compat.v1.random_normal([54, ], mean=1, stddev=4, seed=1)
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes)

    print("scores[2] = " + str(scores[2].eval()))           # scores[2] = 6.938395
    print("boxes[2] = " + str(boxes[2].eval()))             # boxes[2] = [-5.299932    3.1379814   4.450367    0.95942086]
    print("classes[2] = " + str(classes[2].eval()))         # classes[2] = -2.2452729
    print("score.shape = " + str(scores.eval().shape))      # score.shape = (10,)
    print("boxes.shape = " + str(boxes.eval().shape))       # boxes.shape = (10, 4)
    print("classes.shape = " + str(classes.eval().shape))   # classes.shape = (10,)

    test_b.close()


# 对所有框进行过滤
def yolo_eval(yolo_outputs, image_shape=(720., 1280.),
              max_boxes=10, score_threshold=0.6, iou_threshold=0.5):
    """
    将YOLO编码的输出(很多锚框)转换为预测框以及它们的分数、框坐标和类
    :param yolo_outputs:    -编码模型的输出(对于维度为(608,608,3)的图片),包含4个tensors类型的变量:
                             box_confidence: tensor类型,维度为(None,19,19,5,1)
                             box_xy: tensor类型,维度为(None,19,19,5,2)
                             box_wh: tensor类型,维度为(None,19,19,5,2)
                             box_class_probs: tensor类型,维度为(None,19,19,5,80)
    :param image_shape:     -tensor类型,维度为(2,),包含了输入的图像的维度,这里是(608.,608.)
    :param max_boxes:       -整数,预测的锚框数量的最大值
    :param score_threshold: -实数,可能性阈值
    :param iou_threshold:   -实数,交并比阈值
    :return: scores     -tensor类型,维度为(,None),每个锚框的预测的可能值
             boxes      -tensor类型,维度为(4,None),预测的锚框的坐标
             classes    -tensor类型,维度为(,None),每个锚框的预测的分类
    """

    # 获取YOLO模型的输出
    box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

    # 中心点转换为边角
    boxes = yolo_boxes_to_corners(box_xy, box_wh)

    # 可信度分值过滤
    scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, score_threshold)

    # 缩放锚框,以适应原始图像
    boxes = scale_boxes(boxes, image_shape)

    # 使用非最大值抑制
    scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes, iou_threshold)

    return scores, boxes, classes

print("======================测试yolo_eval================")
with tf.compat.v1.Session() as test_c:
    yolo_outputs = (tf.compat.v1.random_normal([19, 19, 5, 1], mean=1, stddev=4, seed=1),
                    tf.compat.v1.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
                    tf.compat.v1.random_normal([19, 19, 5, 2], mean=1, stddev=4, seed=1),
                    tf.compat.v1.random_normal([19, 19, 5, 80], mean=1, stddev=4, seed=1))
    print(yolo_outputs) # (, , , )

    scores, boxes, classes = yolo_eval(yolo_outputs)

    print("scores[2] = " + str(scores[2].eval()))           # scores[2] = 138.79124
    print("boxes[2] = " + str(boxes[2].eval()))             # boxes[2] = [1292.3297  -278.52167 3876.9893  -835.56494]
    print("classes[2] = " + str(classes[2].eval()))         # classes[2] = 54
    print("score.shape = " + str(scores.eval().shape))      # score.shape = (10,)
    print("boxes.shape = " + str(boxes.eval().shape))       # boxes.shape = (10, 4)
    print("classes.shape = " + str(classes.eval().shape))   # classes.shape = (10,)

    test_c.close()


"""
    测试已经训练好了的YOLO模型
"""
# 创建会话启动计算图
# graph = tf.compat.v1.get_default_graph()
sess = tf.compat.v1.keras.backend.get_session()

# 1.定义分类、锚框、图像维度
class_names = read_classes("model_data/coco_classes.txt")   # 80个类信息
anchors = read_anchors("model_data/yolo_anchors.txt")       # 5个锚框信息
image_shape = (720., 1280.)

# 2.加载已经训练好了的模型,会加载YOLO模型的权重,该模型会将预处理的一批图像(608,608,3)转换为tensor类型(m,19,19,5,85)
yolo_model = load_model("model_data/yolo.h5")


# 模型包含的图层的摘要
yolo_model.summary()

# 3.将模型的输出转换为边界框
print(yolo_model.output)    # Tensor("conv2d_22/BiasAdd:0", shape=(None, 19, 19, 425), dtype=float32)
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
print(str(yolo_outputs))    # (, , , )

"""
    若报错维度不匹配,需要改变输出的参数顺序:
        yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))
        print(str(yolo_outputs))
        box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
        yolo_outputs = (box_confidence, box_xy, box_wh, box_class_probs)
        print(str(yolo_outputs))
"""

# 4.过滤锚框
scores, boxes, classes = yolo_eval(yolo_outputs, image_shape)

# 5.在实际图像中运行计算图,对图像进行预测
def predict(sess, image_file, is_show_info=True, is_plot=True):
    """
    运行存储在sess的计算图以预测image_file的边界框,打印出预测的图与信息
    :param sess:        -包含了YOLO计算图的TensorFlow/Keras的会话
    :param image_file:  -存储在images文件夹下的图片名称
    :param is_show_info:
    :param is_plot:
    :return:out_scores  -tensor类型,维度为(None,),锚框的预测的可能值
            out_boxes   -tensor类型,维度为(None,4),包含了锚框位置信息
            out_classes -tensor类型,维度为(None,),锚框的预测的分类索引
    """

    # 图像预处理
    image, image_data = preprocess_image("images/" + image_file, model_image_size=(608, 608))

    # 运行会话并在feed_dict中选择正确的占位符
    out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes],
                                                  feed_dict={
     yolo_model.input: image_data, K.learning_phase(): 0})

    # 打印预测信息
    if is_show_info:
        print("在" + str(image_file) + "中找到了" + str(len(out_boxes)) + "个锚框。")

    # 指定要绘制的边界框的颜色
    colors = generate_colors(class_names)

    # 在图中绘制边界框
    draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)

    # 保存已经绘制了边界框的图
    image.save(os.path.join("out", image_file), quality=100)

    # 打印出已经绘制了边界框的图
    if is_plot:
        output_image = imageio.imread(os.path.join("out", image_file))
        plt.imshow(output_image)
        plt.show()

    return out_scores, out_boxes, out_classes

# 实际预测一下:
# with sess.as_default():
# with graph.as_default():
out_scores, out_boxes, out_classes = predict(sess, "test.jpg")
"""
    运行结果:
        在test.jpg中找到了7个锚框。
        car 0.60 (925, 285) (1045, 374)
        bus 0.67 (5, 267) (220, 407)
        car 0.68 (705, 279) (786, 351)
        car 0.70 (947, 324) (1280, 704)
        car 0.75 (159, 303) (346, 440)
        car 0.80 (762, 282) (942, 412)
        car 0.89 (366, 299) (745, 648)
"""

# 6.批量绘制图
for i in range(1, 121):
    # 计算需要在前面填充几个0,想不明白为啥这么算
    num_fill = int(len("0000") - len(str(1))) + 1   # 4
    # 对索引进行填充
    filename = str(i).zfill(num_fill) + ".jpg"
    print("当前文件:" + str(filename))
    # 开始绘制,不打印信息,不绘制图
    out_scores, out_boxes, out_classes = predict(sess, filename, is_show_info=False, is_plot=False)

print("绘制完成")

yolo_utils.py

import colorsys
import imghdr
import os
import random
from keras import backend as K

import numpy as np
from PIL import Image, ImageDraw, ImageFont

def read_classes(classes_path):
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names

def read_anchors(anchors_path):
    with open(anchors_path) as f:
        anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        anchors = np.array(anchors).reshape(-1, 2)
    return anchors

def generate_colors(class_names):
    hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]
    colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
    colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
    random.seed(10101)  # Fixed seed for consistent colors across runs.
    random.shuffle(colors)  # Shuffle colors to decorrelate adjacent classes.
    random.seed(None)  # Reset seed to default.
    return colors

def scale_boxes(boxes, image_shape):
    """ Scales the predicted boxes in order to be drawable on the image"""
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims
    return boxes

def preprocess_image(img_path, model_image_size):
    image_type = imghdr.what(img_path)
    image = Image.open(img_path)
    resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
    image_data = np.array(resized_image, dtype='float32')
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)  # Add batch dimension.
    return image, image_data

def draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors):
    
    font = ImageFont.truetype(font='font/FiraMono-Medium.otf',size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    thickness = (image.size[0] + image.size[1]) // 300

    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]

        label = '{} {:.2f}'.format(predicted_class, score)

        draw = ImageDraw.Draw(image)
        label_size = draw.textsize(label, font)

        top, left, bottom, right = box
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))

        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])

        # My kingdom for a good redistributable image drawing library.
        for i in range(thickness):
            draw.rectangle([left + i, top + i, right - i, bottom - i], outline=colors[c])
        draw.rectangle([tuple(text_origin), tuple(text_origin + label_size)], fill=colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        del draw

运行结果

【吴恩达深度学习编程作业】4.3目标检测——车辆识别_第1张图片

你可能感兴趣的:(吴恩达深度学习,深度学习,tensorflow,python,图像识别)