跟着大佬在学习
深度学习在图像处理中的应用(tensorflow2.4以及pytorch1.10实现)
#测试
import torch
input1 = torch.rand([32,3,32,32])
model = LeNet()
print(model)
output = model(input1)
import torch
import torchvision
import torch.nn as nn
from model import LeNet
import torch.optim as optim
import torchvision.transforms as transforms
def main():
# 50000张训练图片
# 第一次使用时要将download设置为True才会自动去下载数据集
train_set = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True)
if __name__ == '__main__':
main()
代码中可以查看torch官方的数据集
torchvision.datasets.
https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
深度学习在图像处理中的应用(tensorflow2.4以及pytorch1.10实现)
pytorch中的卷积操作详解
经卷积后的矩阵尺寸大小计算公式为:
N = (W − F + 2P ) / S + 1
但在实际应用中,有时会出现N为非整数的情况(例如在alexnet,googlenet网络的第一层输出),再例如输入的矩阵 H=W=5,卷积核的F=2,S=2,Padding=1。经计算我们得到的N =(5 - 2 + 21)/ 2 +1 = 3.5 此时在Pytorch中是如何处理呢,先直接告诉你结论:在卷积过程中会直接将最后一行以及最后一列给忽略掉,以保证N为整数,此时N = (5 - 2 + 21 - 1)/ 2 + 1 = 3。
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential( #提取图像特征并打包
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # 定义第一层卷积 N = (W − F + 2P ) / S + 1 input[3, 224, 224] output[48, 55, 55]
# F表示卷积核大小 N = (224 - 11 + 2*2)/4 +1 =55
# 3通道,48个卷积核,每个卷积核大小11,步长4,上下左右都补2列0
nn.ReLU(inplace=True), # 激活函数 inplace = True 增加计算量减少内存开销
nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
)
self.classifier = nn.Sequential( #分类器 将全连接层打包
nn.Dropout(p=0.5), #增加dropout函数,在正向传播过程中随机失活一部分神经元 默认是0.5
nn.Linear(128 * 6 * 6, 2048), #定义第一层全连接层 channel=128 6*6 2048个结点 上一层是Maxpool3[6,6,256]
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048), #定义第二层全连接层,上一层的输出是这一层的输入 2048 2048个结点
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes), #定义第三层全连接层,类别个数
)
if init_weights: #初始化权重
self._initialize_weights()
def forward(self, x): #正向传播
x = self.features(x)
x = torch.flatten(x, start_dim=1) #展平处理 从1这个维度(channel)开始,batch不懂
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d): #判断所属类型
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01) #通过正态分布 0是均值 0.01是方差
nn.init.constant_(m.bias, 0) #bias偏置为0
train.py
测试
validate_loader = torch.utils.data.DataLoader(validate_dataset, #载入测试集
batch_size=4, shuffle=True,
num_workers=nw)
test_data_iter = iter(validate_loader)
test_image, test_label = test_data_iter.next()
def imshow(img):
img = img / 2 + 0.5 # unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
imshow(utils.make_grid(test_image))
import os
import sys
import json
import time
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device( "cpu") #指定训练设备 如果电脑有GPU,默认使用第一块,没有GPU则使用CPU
print("using {} device.".format(device))
data_transform = { #数据预处理
"train": transforms.Compose([transforms.RandomResizedCrop(224),#随机裁剪 224 x 224
transforms.RandomHorizontalFlip(),#随机翻转
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),#标准化处理
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path 获取数据集所在根目录
#os.getcwd()获取当前文件所在目录
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"]) #加载数据集 数据预处理
train_num = len(train_dataset) #训练集有多少张图片
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx #分类名称对应的索引
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4) #将字典转换为json格式
with open('class_indices.json', 'w') as json_file: #保存到json文件中 方便预测读取信息
json_file.write(json_str)
#batch_size = 32 #一次读32张图片
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset, #加载训练集
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),#载入验证集
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset, #载入测试集
batch_size=batch_size, shuffle=True,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# 测试
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=5, init_weights=True) #实例化模型,传入类别为5,初始化权重
net.to(device) #指定训练类型 GPU/CPU
loss_function = nn.CrossEntropyLoss() #损失函数 交叉熵函数,针对多类别
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)#定义Adam优化器 学习率为0.0002 优化对象是网络中所有参数 调大或小都会影响精度
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0 #最佳准确率
train_steps = len(train_loader)
#开始训练
for epoch in range(epochs):#迭代10次
# train
net.train() #启动dropout方法
running_loss = 0.0 #统计训练过程中的平均损失
t1 = time.perf_counter()#统计每一次迭代使用时间
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):#遍历数据集
images, labels = data #将数据分为图像和标签
optimizer.zero_grad() #清空梯度信息
outputs = net(images.to(device)) #正向传播 指定设备
loss = loss_function(outputs, labels.to(device))#计算损失 预测值和真实值的损失
loss.backward() #将损失反向传播到每个节点中
optimizer.step() #更新每个节点的参数
# print statistics
running_loss += loss.item()
#打印训练进度
rate = (step+1)/len(train_loader) #len(train_loder)得到一轮所需要的步数
a = "*" * int(rate*50)
b = "." * int((1-rate)*50)
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,epochs,loss)
print(time.perf_counter()-t1)#输出每一轮训练时间
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad(): #确保验证过程中不会计算损失梯度
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:#遍历验证集
val_images, val_labels = val_data #得到图片和标签
outputs = net(val_images.to(device)) #正向传播得到输出
predict_y = torch.max(outputs, dim=1)[1] #得到预测的最大值
#acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
acc += (predict_y == val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
无法使用GPU训练,暂时不知道什么原因
解决方法
将num_workers改为1即可
#字典文件 模型配置 根据表格信息给定的卷积和池化信息
#vgg11 -> A配置,表示有11层 ...
cfgs = {
'vgg11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],#64个卷积 'M'最大池化层结构 128个卷积 ...
'vgg13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'vgg16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'vgg19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}