本系列文章旨在对 Github 上 malin9402 提供的代码进行说明,在这篇文章中,我们会对 YOLOv3 项目中的 image_demo.py 文件进行说明。这个程序的作用是对输入的图片中的内容进行目标检测,并在图上标注检测框。
如果只是想运行 Github 上的代码,可以参考对 YOLOv3 代码的说明一文。
import cv2
import numpy as np
import core.utils as utils
import tensorflow as tf
from core.yolov3 import YOLOv3, decode
from PIL import Image
input_size = 416 # 输入模型的图片尺寸
image_path = "./docs/kite.jpg" # 图片路径
original_image = cv2.imread(image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB) # original_image.shape=(900, 1352, 3)
original_image_size = original_image.shape[:2]
image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size]) # (416, 416, 3)
image_data = image_data[np.newaxis, ...].astype(np.float32) # (1, 416, 416, 3)
# 得到三层特征图
input_layer = tf.keras.layers.Input([input_size, input_size, 3])
feature_maps = YOLOv3(input_layer)
# 将特征图解码
bbox_tensors = []
for i, fm in enumerate(feature_maps):
bbox_tensor = decode(fm, i)
bbox_tensors.append(bbox_tensor)
# 构建模型
model = tf.keras.Model(input_layer, bbox_tensors)
# 将已经训练好的权值赋给模型
utils.load_weights(model, "./yolov3.weights")
# 将图片输入模型得到预测框
# pred_bbox 是个列表,其中含有三个数组
# 数组的形状分别是:(1, 52, 52, 3, 15),(1, 26, 26, 3, 15),(1, 13, 13, 3, 15)
pred_bbox = model.predict(image_data)
# 将每个数组形状转换为(-1, 15)
pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
# 把所有框的信息整合起来
pred_bbox = tf.concat(pred_bbox, axis=0)
bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3)
bboxes = utils.nms(bboxes, 0.45, method='nms')
image = utils.draw_bbox(original_image, bboxes)
image = Image.fromarray(image)
image.show()
import cv2
import numpy as np
import core.utils as utils
import tensorflow as tf
from core.yolov3 import YOLOv3, decode
from PIL import Image
input_size = 416
image_path = "./docs/kite.jpg"
input_layer = tf.keras.layers.Input([input_size, input_size, 3])
feature_maps = YOLOv3(input_layer)
original_image = cv2.imread(image_path)
original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
original_image_size = original_image.shape[:2]
image_data = utils.image_preporcess(np.copy(original_image), [input_size, input_size])
image_data = image_data[np.newaxis, ...].astype(np.float32)
bbox_tensors = []
for i, fm in enumerate(feature_maps):
bbox_tensor = decode(fm, i)
bbox_tensors.append(bbox_tensor)
model = tf.keras.Model(input_layer, bbox_tensors)
utils.load_weights(model, "./yolov3/yolov3.weights")
model.summary()
pred_bbox = model.predict(image_data)
pred_bbox = [tf.reshape(x, (-1, tf.shape(x)[-1])) for x in pred_bbox]
pred_bbox = tf.concat(pred_bbox, axis=0)
bboxes = utils.postprocess_boxes(pred_bbox, original_image_size, input_size, 0.3)
bboxes = utils.nms(bboxes, 0.45, method='nms')
image = utils.draw_bbox(original_image, bboxes)
image = Image.fromarray(image)
image.show()