PytorchCNN图片训练框架
from torchvision import transforms
transforms.Compose([
transforms.Scale(64),
transforms.CenterCrop(48),
transforms.ToTensor(),
transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])
from torchvision import datasets
'''图像集合文件夹的绝对路径:os.path.join(数据集根目录, 子目录)'''
ImageFolder = datasets.ImageFolder('{图像集合文件夹的绝对路径}',transforms.Compose)
import torch
torch.utils.data.DataLoader(
ImageFolder,#Dataset ImageFolder形式数据
batch_size=128, #GPU一次读取的数据量,涉及到GPU内存
shuffle=True if 是训练集 else False,
num_workers=0 #训练时使用cpu线程数
)
ataset_sizes = len(ImageFolder)
class simpleconv3(nn.Module): ``````
def __init__(self):
super(simpleconv3,self).__init__()
#-----------------第一层-------------------
#卷积核设置:输入chanel数,输出卷积核个数(输出chanel数),卷积核大小,卷积核移动步长
self.conv1 = nn.Conv2d(3, 12, 3, 2)
#Batch Normalization;参数num_features:可以输入卷积结果的chanel(C)数或N, C, H, W
self.bn1 = nn.BatchNorm2d(12)
#-----------------第二层-------------------
self.conv2 = nn.Conv2d(12, 24, 3, 2)
self.bn2 = nn.BatchNorm2d(24)
#-----------------第三层-------------------
self.conv3 = nn.Conv2d(24, 48, 3, 2)
self.bn3 = nn.BatchNorm2d(48)
#计算出来的特征矩阵形状为(5,5,48)
self.fc1 = nn.Linear(48 * 5 * 5 , 1200)
self.fc2 = nn.Linear(1200 , 128)
self.fc3 = nn.Linear(128 , 4)
#为什么要分3次???不理解,希望大佬可以解答一下
def forward(self , x):
#F.relu() relu激活函数,转换成概率
x = F.relu(self.bn1(self.conv1(x)))
#print "bn1 shape",x.shape
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
x = x.view(-1 , 48 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
use_gpu = torch.cuda.is_available() #查看GPU可不可用
modelclc = simpleconv3()
if use_gpu:
torch.cuda.empty_cache() #清除显存
modelclc = modelclc.cuda() #使用GPU来训练
#标准:使用交叉熵,实际与预期的接近程度
criterion = nn.CrossEntropyLoss()
#优化器:随机梯度下降
optimizer_ft = optim.SGD(modelclc.parameters(), lr=0.1, momentum=0.9)
#优化器调整器
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=100, gamma=0.1)
#配置moedl训练参数
modelclc = train_model(model=modelclc,
criterion=criterion,
optimizer=optimizer_ft,
scheduler=exp_lr_scheduler,
num_epochs=4) # 这里可以调节训练的轮次
if not os.path.exists("models"):
os.mkdir('models')
torch.save(modelclc.state_dict(),'models/model.ckpt')
import sys
import numpy as np
import cv2
import os
import dlib
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
import time
from PIL import Image
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
#opencv嘴唇检测算法
PREDICTOR_PATH = "./Emotion_Recognition_File/face_detect_model/shape_predictor_68_face_landmarks.dat"
predictor = dlib.shape_predictor(PREDICTOR_PATH)
#opencv人脸检测算法
cascade_path = './Emotion_Recognition_File/face_detect_model/haarcascade_frontalface_default.xml'
cascade = cv2.CascadeClassifier(cascade_path)
#opencv嘴唇检测算法
PREDICTOR_PATH = "./Emotion_Recognition_File/face_detect_model/shape_predictor_68_face_landmarks.dat"
predictor = dlib.shape_predictor(PREDICTOR_PATH)
#opencv人脸检测算法
cascade_path = './Emotion_Recognition_File/face_detect_model/haarcascade_frontalface_default.xml'
cascade = cv2.CascadeClassifier(cascade_path)
net = simpleconv3()
net.eval()
modelpath = "./models/model.ckpt" # 模型路径
#map_location 一个函数或字典规定如何映射存储设备
net.load_state_dict(torch.load(modelpath, map_location=lambda storage, loc: storage))
# 一次测试一个文件
img_path = "./Emotion_Recognition_File/find_face_img/"
imagepaths = os.listdir(img_path) # 图像文件夹
for imagepath in imagepaths:
im = cv2.imread(os.path.join(img_path, imagepath), 1)
try:
rects = cascade.detectMultiScale(im, 1.3, 5) # 人脸检测
x, y, w, h = rects[0] # 获取人脸的四个属性值,左上角坐标 x,y 、高宽 w、h
# print(x, y, w, h)
rect = dlib.rectangle(int(x), int(y), int(x + w), int(y + h))
landmarks = np.matrix([[p.x, p.y]
for p in predictor(im, rect).parts()]) #所有检测点集合矩阵
except:
# print("没有检测到人脸")
continue # 没有检测到人脸
# 根据最外围的关键点获取包围嘴唇的最小矩形框
# 68 个关键点是从
# 左耳朵0 -下巴-右耳朵16-左眉毛(17-21)-右眉毛(22-26)-左眼睛(36-41)
# 右眼睛(42-47)-鼻子从上到下(27-30)-鼻孔(31-35)
# 嘴巴外轮廓(48-59)嘴巴内轮廓(60-67)
xmin = 10000
xmax = 0
ymin = 10000
ymax = 0
#遍历嘴唇点
for i in range(48, 67):
x = landmarks[i, 0] #获得嘴唇点的x轴坐标
y = landmarks[i, 1] #获得嘴唇点的y轴坐标
if x < xmin:
xmin = x
if x > xmax:
xmax = x
if y < ymin:
ymin = y
if y > ymax:
ymax = y
#得到嘴的图形框的四个点
#print("xmin=", xmin)
#print("xmax=", xmax)
#print("ymin=", ymin)
#print("ymax=", ymax)
#print('\n')
roiwidth = xmax - xmin #得到嘴的图形框的长
roiheight = ymax - ymin #得到嘴的图形框的宽
roi = im[ymin:ymax, xmin:xmax, 0:3]
#保证嘴在中心,而不是整张图像的嘴刚刚好,防止卷积时边缘特征的丢失
if roiwidth > roiheight:
dstlen = 1.5 * roiwidth
else:
dstlen = 1.5 * roiheight
diff_xlen = dstlen - roiwidth
diff_ylen = dstlen - roiheight
newx = xmin
newy = ymin
imagerows, imagecols, channel = im.shape
if newx >= diff_xlen / 2 and newx + roiwidth + diff_xlen / 2 < imagecols:
newx = newx - diff_xlen / 2
elif newx < diff_xlen / 2:
newx = 0
else:
newx = imagecols - dstlen
if newy >= diff_ylen / 2 and newy + roiheight + diff_ylen / 2 < imagerows:
newy = newy - diff_ylen / 2
elif newy < diff_ylen / 2:
newy = 0
else:
newy = imagecols - dstlen
#获得嘴唇裁剪图矩阵
roi = im[int(newy):int(newy + dstlen), int(newx):int(newx + dstlen), 0:3]
data_transforms = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
#BGR——》RGB
roi = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
#匹配模型输入48*48*3,归一化
roiresized = cv2.resize(roi,
(48, 48)).astype(np.float32) / 255.0
#转换成tensor,标准化,并在0轴增加一个维度
imgblob = data_transforms(roiresized).unsqueeze(0) #[1,48,48,3] Why?
print(imgblob)
#tensor不需要求导
imgblob.requires_grad = False
#转换成Variable
imgblob = Variable(imgblob)
#对数据不进行求导
torch.no_grad()
#对与预测结果归一化
predict = F.softmax(net(imgblob))
print(predict)
#获取预测结果矩阵并转换成numpy形式,并使用np.argmax()函数返回预测结果最大值的索引,获得表情的索引值
index = np.argmax(predict.detach().numpy())
#读取图片
im_show = cv2.imread(os.path.join(img_path, imagepath), 1)
#获得图片的各个维度信息
im_h, im_w, im_c = im_show.shape
pos_x = int(newx + dstlen)
pos_y = int(newy + dstlen)
font = cv2.FONT_HERSHEY_SIMPLEX
#在图片上把嘴唇框起来
cv2.rectangle(im_show, (int(newx), int(newy)),
(int(newx + dstlen), int(newy + dstlen)), (0, 255, 255), 2)
if index == 0:
cv2.putText(im_show, 'none', (pos_x, pos_y), font, 1.5, (0, 0, 255), 2) #在图片上写‘none’
if index == 1:
cv2.putText(im_show, 'pout', (pos_x, pos_y), font, 1.5, (0, 0, 255), 2)
if index == 2:
cv2.putText(im_show, 'smile', (pos_x, pos_y), font, 1.5, (0, 0, 255), 2)
if index == 3:
cv2.putText(im_show, 'open', (pos_x, pos_y), font, 1.5, (0, 0, 255), 2)
# cv2.namedWindow('result', 0)
# cv2.imshow('result', im_show)
#把图片写到./results路径下
cv2.imwrite(os.path.join('results', imagepath), im_show)
# print(os.path.join('results', imagepath))
# 这里需要交换通道,因为 matplotlib 保存图片的通道顺序是 RGB,而在 OpenCV 中是 BGR
plt.imshow(im_show[:, :, ::-1])
plt.show()
# cv2.waitKey(0)
# cv2.destroyAllWindows()
。。。