本文基于人工智能领域大佬Bubbliiiing睿智的目标检测42——Pytorch搭建Retinaface人脸检测与关键点定位平台
原文链接:https://blog.csdn.net/weixin_44791964/article/details/106872072
这是是我的学习笔记,记录我对开源项目的本地复现,重新封装和功能拓展的学习历程,万分感谢大佬的开源和无私奉献。本文部分内容来自网上搜集与个人实践。如果任何信息存在错误,欢迎读者批评指正。本文仅用于学习交流,不用作任何商业用途。
该项目实现了Retinaface人脸检测模型的训练、测试和评估,可以帮助用户快速上手Retinaface模型。
Retinaface是一种单阶段的人脸检测模型,具有高精度和高效率的优点。该项目将Retinaface应用到人脸检测任务上,实现了模型的端到端流程。
github:
Face-recognition-web-ui
我的相关笔记:
Retinaface实现人脸检测与关键点定位-深度学习学习笔记-1
Facenet实现人脸特征比对-深度学习学习笔记-2
RetinaFace人脸检测模型-Gradio界面设计
FaceNet人脸识别模型-Gradio界面设计
Retinaface+FaceNet人脸识别系统-Gradio界面设计
通过RetinaFaceTrainer类进行模型训练。
主要参数:
通过detect_image函数测试模型。
主要参数:
通过test函数评估模型。
主要参数:
在代码中,生成了两种评估图:
精确率-召回率曲线 (Precision-Recall Curve):
平均精度柱状图 (Average Precision Bar Chart):
如何解释这些图表:
评估图表可以帮助您理解模型的性能如何随不同阈值、难度级别等因素变化,从而更好地优化模型或选择适当的配置。
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.optim as optim
from torch.utils.data import DataLoader
from nets.retinaface import RetinaFace
from nets.retinaface_training import MultiBoxLoss, weights_init
from utils.anchors import Anchors
from utils.callbacks import LossHistory
from utils.config import cfg_mnet, cfg_re50
from utils.dataloader import DataGenerator, detection_collate
from utils.utils_fit import fit_one_epoch
class RetinaFaceTrainer:
def __init__(self, Cuda=True, training_dataset_path='./data/widerface/train/label.txt',
backbone="mobilenet", pretrained=True, model_path='',
Freeze_Train=True, num_workers=4):
self.Cuda = Cuda # 是否使用GPU
self.training_dataset_path = training_dataset_path # 人脸标注文件的路径
self.backbone = backbone # 选择mobilenet或resnet50为特征提取网络
self.pretrained = pretrained # 是否使用预训练权重
self.model_path = model_path # 模型权重地址
self.Freeze_Train = Freeze_Train # 是否进行冻结训练
self.num_workers = num_workers # 使用4个workers线程从DataLoader中读取数据
def load_model(self):
if self.backbone == "mobilenet":
cfg = cfg_mnet
elif self.backbone == "resnet50":
cfg = cfg_re50
else:
raise ValueError('Unsupported backbone - `{}`, Use mobilenet, resnet50.'.format(self.backbone))
model = RetinaFace(cfg=cfg, pretrained=self.pretrained)
if not self.pretrained:
weights_init(model)
if self.model_path != '':
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict()
pretrained_dict = torch.load(self.model_path, map_location=device)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
model_train = model.train()
if self.Cuda:
model_train = torch.nn.DataParallel(model)
cudnn.benchmark = True
model_train = model_train.cuda()
anchors = Anchors(cfg, image_size=(cfg['train_image_size'], cfg['train_image_size'])).get_anchors()
if self.Cuda:
anchors = anchors.cuda()
criterion = MultiBoxLoss(2, 0.35, 7, cfg['variance'], self.Cuda)
loss_history = LossHistory("logs/")
return model,model_train, anchors, criterion, loss_history
def freeze_train(self):
model,model_train, anchors, criterion, loss_history = self.load_model()
lr = 1e-3
# 学习率大收敛快
Batch_size = 8
Init_Epoch = 0
Freeze_Epoch = 2
optimizer = optim.Adam(model_train.parameters(), lr, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
train_dataset = DataGenerator(self.training_dataset_path, cfg_mnet['train_image_size'])
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=self.num_workers,
pin_memory=True, drop_last=True, collate_fn=detection_collate)
epoch_step = train_dataset.get_len() // Batch_size
if self.Freeze_Train:
for param in model.body.parameters():
param.requires_grad = False
for epoch in range(Init_Epoch, Freeze_Epoch):
fit_one_epoch(model_train, model, loss_history, optimizer, criterion, epoch, epoch_step, gen,
Freeze_Epoch, anchors, cfg_mnet, self.Cuda)
lr_scheduler.step()
lr = 1e-4
# 学习率小防止震荡
Batch_size = 4
Freeze_Epoch = 2
Unfreeze_Epoch = 4
optimizer = optim.Adam(model_train.parameters(), lr, weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
train_dataset = DataGenerator(self.training_dataset_path, cfg_mnet['train_image_size'])
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=self.num_workers,
pin_memory=True, drop_last=True, collate_fn=detection_collate)
epoch_step = train_dataset.get_len() // Batch_size
if self.Freeze_Train:
for param in model.body.parameters():
param.requires_grad = True
for epoch in range(Freeze_Epoch, Unfreeze_Epoch):
fit_one_epoch(model_train, model, loss_history, optimizer, criterion, epoch, epoch_step, gen,
Unfreeze_Epoch, anchors, cfg_mnet, self.Cuda)
lr_scheduler.step()
这段代码的主要功能是人脸检测:
import time
import cv2
import numpy as np
from enretinaface import Retinaface
mode = "predict"
video_path = 0
video_save_path = ""
video_fps = 25.0
test_interval = 100
dir_origin_path = "img/"
dir_save_path = "img_out/"
def detect_image(img, model_path, backbone, temp_img_path):
retinaface = Retinaface(model_path=model_path, backbone=backbone)
image = cv2.imread(img)
if image is None:
print('Open Error! Try again!')
return
else:
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
r_image = retinaface.detect_image(image)
r_image = cv2.cvtColor(r_image, cv2.COLOR_RGB2BGR)
# cv2.imshow("after", r_image)
# cv2.waitKey(0)
if temp_img_path != "":
# 保存到临时文件
cv2.imwrite(temp_img_path, r_image)
print("Save processed img to the path :" + temp_img_path)
return temp_img_path
这段代码的主要功能是进行RetinaFace模型的mAP评估:
import os
import cv2
import numpy as np
import tqdm
import matplotlib.pyplot as plt
from enretinaface import Retinaface
from utils.enutils_map import evaluation
def plot_precision_recall_curve(precisions, recalls):
plt.plot(recalls, precisions)
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.grid(True)
plt.savefig('precision_recall_curve.png') # Save the plot to local file
plt.close()
# plt.show()
# Function to plot the AP bar chart
def plot_ap_bar_chart(aps):
labels = ['Easy', 'Medium', 'Hard']
x = np.arange(len(labels))
plt.bar(x, aps)
plt.xlabel('Difficulty Setting')
plt.ylabel('AP')
plt.title('Average Precision (AP) by Difficulty Setting')
plt.xticks(x, labels)
plt.savefig('ap_bar_chart.png') # Save the plot to local file
plt.close()
# plt.show()
def test(model_path, backbone):
mAP_retinaface = Retinaface(model_path=model_path, backbone=backbone, confidence=0.01, nms_iou=0.45)
save_folder = './widerface_evaluate/widerface_txt/'
gt_dir = "./widerface_evaluate/ground_truth/"
imgs_folder = './data/widerface/val/images/'
sub_folders = os.listdir(imgs_folder)
test_dataset = []
for sub_folder in sub_folders:
image_names = os.listdir(os.path.join(imgs_folder, sub_folder))
for image_name in image_names:
test_dataset.append(os.path.join(sub_folder, image_name))
num_images = len(test_dataset)
# 存储精确率和召回率数据
precisions = []
recalls = []
aps = []
for img_name in tqdm.tqdm(test_dataset):
image = cv2.imread(os.path.join(imgs_folder, img_name))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
results = mAP_retinaface.get_map_txt(image)
save_name = save_folder + img_name[:-4] + ".txt"
dirname = os.path.dirname(save_name)
if not os.path.isdir(dirname):
os.makedirs(dirname)
with open(save_name, "w") as fd:
file_name = os.path.basename(save_name)[:-4] + "\n"
bboxs_num = str(len(results)) + "\n"
fd.write(file_name)
fd.write(bboxs_num)
for box in results:
x = int(box[0])
y = int(box[1])
w = int(box[2]) - int(box[0])
h = int(box[3]) - int(box[1])
confidence = str(box[4])
line = str(x) + " " + str(y) + " " + str(w) + " " + str(h) + " " + confidence + " \n"
fd.write(line)
# 计算精确率和召回率数据
precision, recall, ap = evaluation(save_folder, gt_dir)
precisions.append(precision)
recalls.append(recall)
aps.append(ap)
# 处理精确率和召回率数据
precisions = np.array(precisions)
recalls = np.array(recalls)
precisions = np.mean(precisions, axis=0)
recalls = np.mean(recalls, axis=0)
precisions = np.squeeze(precisions)
recalls = np.squeeze(recalls)
np.savetxt('precisions.txt', precisions)
np.savetxt('recalls.txt', recalls)
np.savetxt('aps.txt', aps)
# Plot Precision-Recall Curve
# Load the data from the local files
precisions = np.loadtxt('precisions.txt')
recalls = np.loadtxt('recalls.txt')
aps = np.loadtxt('aps.txt')
# Plot Precision-Recall Curve
plot_precision_recall_curve(precisions, recalls)
# Plot AP Bar Chart
plot_ap_bar_chart(aps)
return "precision_recall_curve.png","ap_bar_chart.png"
这段代码实现了一个RetinaFace模型的训练、预测和评估的可视化交互界面:
import gradio as gr
from retinaface_trainer import RetinaFaceTrainer
from enpredict import detect_image
from eneva import test
def train_retinaface(Cuda=True, training_dataset_path='./data/widerface/train/label.txt',
backbone="mobilenet", pretrained=False, model_path='',
Freeze_Train=True, num_workers=4):
trainer = RetinaFaceTrainer(Cuda, training_dataset_path, backbone, pretrained, model_path, Freeze_Train,
int(num_workers))
trainer.freeze_train()
return "训练结束,具体情况请在控制台中查看"
def detect_image_change(image, model_path, backbone):
image_path = image.name
model_path = model_path.name
temp_img_path = "output/result.jpg"
result = detect_image(image_path, model_path, backbone, temp_img_path)
return result
def evaluation_test(model_path, backbone):
model_path = model_path.name
img_path_1, img_path_2 = test(model_path, backbone)
print(img_path_1, img_path_2)
return img_path_1, img_path_2
if __name__ == "__main__":
with gr.Blocks() as demo:
# 顶部文字
gr.Markdown("""
# Retinaface模型
项目运行具体情况请在控制台中查看
### 1. 模态一:模型训练
点击example即可自动填充预设训练方式
如果设置了model_path,则主干的权值无需加载,pretrained的值无意义,默认pretrained = False,Freeze_Train = True。
如果不设置model_path,pretrained = True,此时仅加载主干开始训练。默认pretrained = True,Freeze_Train = True。
如果不设置model_path,pretrained = False,Freeze_Train = False,此时从0开始训练,且没有冻结主干的过程。
### 2. 模态二:模型试用
点击example即可自动填充预设模型和测试用图
### 3. 模态三:模型评估
点击example即可自动填充预设模型
""")
with gr.Tabs():
with gr.TabItem("模型训练"):
# 一行 两列 左边一列是输入 右边一列是输出
with gr.Row():
with gr.Column(): # 左边一列是输入
use_cuda = gr.Checkbox(label="Use CUDA")
dataset_path = gr.Textbox(label="Training Dataset Path")
backbone = gr.Dropdown(['mobilenet', 'resnet50'], label="Backbone")
use_pretrained = gr.Checkbox(label="Use Pretrained")
model_path = gr.Textbox(label="Model Path (if available)")
freeze_training = gr.Checkbox(label="Freeze Training")
num_workers = gr.Number(label="Number of Workers")
with gr.Row():
train_button = gr.Button("开始训练")
with gr.Column():
x_output = gr.Textbox(label="Training Log")
gr.Examples(
examples=[[True,
'./data/widerface/train/label.txt',
"mobilenet",
False,
'model_data/Retinaface_mobilenet0.25.pth',
True,
4],
[True,
'./data/widerface/train/label.txt',
"resnet50",
False,
'model_data/Retinaface_resnet50.pth',
True,
4],
[True,
'./data/widerface/train/label.txt',
"mobilenet",
True,
'',
True,
4],
[True,
'./data/widerface/train/label.txt',
"mobilenet",
False,
'model_data/Retinaface_mobilenet0.25.pth',
False,
4],
],
inputs=[use_cuda, dataset_path, backbone, use_pretrained,
model_path, freeze_training, num_workers])
train_button.click(fn=train_retinaface,
inputs=[use_cuda, dataset_path, backbone, use_pretrained,
model_path, freeze_training, num_workers],
outputs=x_output)
with gr.TabItem("模型试用"):
# 一行 两列 左边一列是输入 右边一列是输出
with gr.Row():
with gr.Column(): # 左边一列是输入
image_input = gr.File(label="Image")
model_input = gr.File(label="model_path")
bone_input = gr.Dropdown(['mobilenet', 'resnet50'], label="Backbone")
# 生成、重置按钮(row:行)
with gr.Row():
image_button = gr.Button("生成")
with gr.Column(): # 右边一列是输出
# 输出框
image_output = gr.Image(label="Output Image")
# 样例框
gr.Examples(
examples=[
["model_data/Retinaface_mobilenet0.25.pth", "mobilenet"],
["model_data/Retinaface_resnet50.pth", 'resnet50'],
["logs/Epoch1-Total_Loss7.8133.pth", "mobilenet"],
["logs/Epoch1-Total_Loss16.7059.pth", "mobilenet"],
["logs/Epoch1-Total_Loss18.3385.pth", "mobilenet"],
["logs/Epoch1-Total_Loss22.6031.pth", "mobilenet"],
["logs/Epoch1-Total_Loss28.5903.pth", "mobilenet"],
],
inputs=[model_input, bone_input]
)
# 样例框
gr.Examples(
examples=[
"img/street.jpg",
"img/timg.jpg"
],
inputs=[image_input]
)
image_button.click(fn=detect_image_change,
inputs=[image_input, model_input, bone_input],
outputs=image_output),
with gr.TabItem("模型评估"):
# 一行 两列 左边一列是输入 右边一列是输出
with gr.Row():
with gr.Column(): # 左边一列是输入
test_model_input = gr.File(label="model_path")
test_bone_input = gr.Dropdown(['mobilenet', 'resnet50'], label="Backbone")
# 生成、重置按钮(row:行)
with gr.Row():
eva_button = gr.Button("测试")
with gr.Column(): # 右边一列是输出
# 输出框
test_image_output_1 = gr.Image(label="Output Image")
test_image_output_2 = gr.Image(label="Output Image")
# 样例框
gr.Examples(
examples=[
["model_data/Retinaface_mobilenet0.25.pth", "mobilenet"],
["model_data/Retinaface_resnet50.pth", 'resnet50'],
["logs/Epoch1-Total_Loss7.8133.pth", "mobilenet"],
["logs/Epoch1-Total_Loss16.7059.pth", "mobilenet"],
["logs/Epoch1-Total_Loss18.3385.pth", "mobilenet"],
["logs/Epoch1-Total_Loss22.6031.pth", "mobilenet"],
["logs/Epoch1-Total_Loss28.5903.pth", "mobilenet"],
],
inputs=[test_model_input, test_bone_input]
)
eva_button.click(fn=evaluation_test,
inputs=[test_model_input, test_bone_input],
outputs=[test_image_output_1, test_image_output_2]),
demo.launch()
打包方式基于
半自动化使用.bat手动打包迁移python项目
rem 创建虚拟环境
python -m venv venv
call venv\Scripts\activate.bat
python -m pip install -i https://mirrors.aliyun.com/pypi/simple/ --upgrade pip setuptools
pip install dlib-19.19.0-cp38-cp38-win_amd64.whl.whl
pip install -i https://mirrors.aliyun.com/pypi/simple/ opencv-python==4.5.3.56
pip install torch-1.7.1+cu110-cp38-cp38-win_amd64.whl
pip install torch==1.7.1+cu110 torchvision==0.8.2+cu110 torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html
pip install -i https://mirrors.aliyun.com/pypi/simple/ gradio
pip install -i https://mirrors.aliyun.com/pypi/simple/ scikit-learn
pip install -i https://mirrors.aliyun.com/pypi/simple/ -r requirements.txt
requirements.txt
scipy==1.7.1
numpy==1.21.2
matplotlib==3.4.3
opencv_python==4.5.3.56
torch==1.7.1
torchvision==0.8.2
tqdm==4.62.2
Pillow==8.3.2
h5py==2.10.0
该文档概括了项目的功能,避免逻辑重复,同时提炼出主要的参数和函数,可以帮助用户快速理解和使用。