本篇文章利用pytorch搭建CNN卷积神经网络实现简单花卉分类的任务
关于CNN卷积神经网络的基础知识以下文章有详细讲解,可供参考:
CNN笔记:通俗理解卷积神经网络
本文使用花卉数据集,该数据集包含了4317张图片,包含雏菊、蒲公英、玫瑰、向日葵、郁金香五种花卉,我已将数据集拆分为训练集和测试集两部分,以下是数据集目录:
数据集已放于以下链接,有需要可自行下载
花卉数据集
step1.读取train\test文件夹,得到其中的五个子文件夹名称(即五种花卉名称)
step2.读取每一种花卉数量,相加得到总图片数目
step3.创建矩阵存放所有图片数据以及其对应标签
step4.返回数据及其标签
def read_file(path): # 读取数据,文件夹中包含五个子文件夹
data_list = os.listdir(path) # 得到5个子文件夹名称
data_len = 0 # 存放总图片数目
for flower_type in data_list:
data_len += len(os.listdir(os.path.join(path, flower_type)))
data = np.zeros((data_len, 128, 128, 3))
data_label = np.zeros(data_len)
i = 0
for j, flower_type in enumerate(data_list): # 读出所有图片
flower_list = os.listdir(os.path.join(path, flower_type))
for img in flower_list:
data[i, :, :, :] = cv2.resize(cv2.imread(os.path.join(path, flower_type, img)), (128, 128))
data_label[i] = j
i += 1
return data, data_label
train_data, train_label = read_file('C:/Users/superjw/Desktop/flowers/train')
test_data, test_label = read_file('C:/Users/superjw/Desktop/flowers/test')
print("already get data...")
step1.将数据转换为tensor(张量)形式并归一化处理
step2.准备Dataset与Dataloader
class ImgDataset(Dataset):
def __init__(self, x=None, y=None, transform=None):
self.x = x
self.y = y
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
train_transform = transforms.Compose([ # 数据处理
transforms.ToTensor(), # 转换为张量
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化处理
])
test_transform = transforms.Compose([ # 数据处理
transforms.ToTensor(), # 转换为张量
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化处理
])
train_dataset = ImgDataset(train_data, train_label, train_transform)
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_dataset = ImgDataset(test_data, test_label, test_transform)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)
print("already process data...")
step1.搭建卷积神经网络
step2.定义损失函数与优化器
step2.查询GPU是否可用
step3.将模型置于GPU上等待训练
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # 输入3*128*128,输出64*128*128
nn.ReLU(),
nn.MaxPool2d(2), # 输出64*64*64
nn.Conv2d(64, 128, 3, 1, 1), # 输出128*64*64
nn.ReLU(),
nn.MaxPool2d(2), # 输出128*32*32
nn.Conv2d(128, 256, 3, 1, 1), # 输出256*32*32
nn.ReLU(),
nn.MaxPool2d(2), # 输出256*16*16
nn.Conv2d(256, 512, 3, 1, 1), # 输出512*16*16
nn.ReLU(),
nn.MaxPool2d(2), # 输出512*8*8
nn.Conv2d(512, 512, 3, 1, 1), # 输出512*8*8
nn.ReLU(),
nn.MaxPool2d(2), # 输出512*4*4
)
self.fc = nn.Sequential(
nn.Linear(512 * 4 * 4, 1024), # 输入8192,输出1024
nn.ReLU(),
nn.Linear(1024, 512), # 输入1024,输出512
nn.ReLU(),
nn.Linear(512, 5) # 输入512,输出5
)
loss = nn.CrossEntropyLoss() # 定义损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 定义优化器
print("if GPU can be used?:", torch.cuda.is_available()) # true 查看GPU是否可用
model = Model().cuda() # 利用GPU加速
print("already prepared model...")
step1.设置训练次数
step2.将模型置于训练状态
step3.利用训练集不断更新网络参数
step4.输出每一次训练后网络的准确度
num_epoch = 30
print("start training...")
for epoch in range(num_epoch):
train_loss = 0
train_acc = 0
model.train() # 模型置于训练状态
for i, data in enumerate(train_loader): # train_loader = (inputs, label)
data[0] = data[0].type(torch.FloatTensor)
data[1] = data[1].type(torch.FloatTensor)
optimizer.zero_grad() # 梯度清零
pred_label = model(data[0].cuda()) # 实际输出
batch_loss = loss(pred_label, data[1].cuda().long()) # 计算误差
batch_loss.backward() # 反向传播
optimizer.step() # 更新参数
train_acc += np.sum(np.argmax(pred_label.cpu().data.numpy(), axis=1) == data[1].numpy()) # 计算准确度
train_loss += batch_loss.item() # 计算误差
print("epoch:{}" .format(epoch+1))
print("model's accuracy in train_data is:{}".format(train_acc/len(train_label)))
print("model's loss in train_data is:{}".format(train_loss/len(train_label)))
print("finish training...")
step1.将网络置于评估模式
step2.计算网络在测试集上的准确度
print("start testing...")
model.eval()
test_acc = 0
with torch.no_grad():
for i, data in enumerate(test_loader):
data[0] = data[0].type(torch.FloatTensor)
data[1] = data[1].type(torch.FloatTensor)
pred_label = model(data[0].cuda()) # 实际输出
test_acc += np.sum(np.argmax(pred_label.cpu().data.numpy(), axis=1) == data[1].numpy()) # 计算准确度
print("model's accuracy in test_data is:{}".format(test_acc / len(test_label)))
torch.save(model.state_dict(), "C:/CodeProject/python/cnn/cnn.pt")
print("already save model")
epoch:30
model's accuracy in train_data is:0.976594027441485
model's loss in train_data is:0.0043283372970561425
finish training...
start testing...
model's accuracy in test_data is:0.6483333333333333
already save model
以上就是利用卷积神经网络实现图像分类的介绍,完整代码如下:
import os
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import cv2
import numpy as np
from torch.utils.data import DataLoader, Dataset
def read_file(path): # 读取数据,文件夹中包含五个子文件夹
data_list = os.listdir(path) # 得到5个子文件夹名称
data_len = 0 # 存放总图片数目
for flower_type in data_list:
data_len += len(os.listdir(os.path.join(path, flower_type)))
data = np.zeros((data_len, 128, 128, 3))
data_label = np.zeros(data_len)
i = 0
for j, flower_type in enumerate(data_list): # 读出所有图片
flower_list = os.listdir(os.path.join(path, flower_type))
for img in flower_list:
data[i, :, :, :] = cv2.resize(cv2.imread(os.path.join(path, flower_type, img)), (128, 128))
data_label[i] = j
i += 1
return data, data_label
class ImgDataset(Dataset):
def __init__(self, x=None, y=None, transform=None):
self.x = x
self.y = y
self.transform = transform
def __len__(self):
return len(self.x)
def __getitem__(self, index):
X = self.x[index]
if self.transform is not None:
X = self.transform(X)
if self.y is not None:
Y = self.y[index]
return X, Y
else:
return X
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.cnn = nn.Sequential(
nn.Conv2d(3, 64, 3, 1, 1), # 输入3*128*128,输出64*128*128
nn.ReLU(),
nn.MaxPool2d(2), # 输出64*64*64
nn.Conv2d(64, 128, 3, 1, 1), # 输出128*64*64
nn.ReLU(),
nn.MaxPool2d(2), # 输出128*32*32
nn.Conv2d(128, 256, 3, 1, 1), # 输出256*32*32
nn.ReLU(),
nn.MaxPool2d(2), # 输出256*16*16
nn.Conv2d(256, 512, 3, 1, 1), # 输出512*16*16
nn.ReLU(),
nn.MaxPool2d(2), # 输出512*8*8
nn.Conv2d(512, 512, 3, 1, 1), # 输出512*8*8
nn.ReLU(),
nn.MaxPool2d(2), # 输出512*4*4
)
self.fc = nn.Sequential(
nn.Linear(512 * 4 * 4, 1024), # 输入8192,输出1024
nn.ReLU(),
nn.Linear(1024, 512), # 输入1024,输出512
nn.ReLU(),
nn.Linear(512, 5) # 输入512,输出5
)
def forward(self, x):
out = self.cnn(x)
out = out.view(out.size()[0], -1) # batch_size*8192
return self.fc(out)
if __name__ == '__main__':
train_data, train_label = read_file('C:/Users/superjw/Desktop/flowers/train')
test_data, test_label = read_file('C:/Users/superjw/Desktop/flowers/test')
print("already get data...")
train_transform = transforms.Compose([ # 数据处理
transforms.ToTensor(), # 转换为张量
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化处理
])
test_transform = transforms.Compose([ # 数据处理
transforms.ToTensor(), # 转换为张量
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # 归一化处理
])
train_dataset = ImgDataset(train_data, train_label, train_transform)
train_loader = DataLoader(train_dataset, batch_size=20, shuffle=True)
test_dataset = ImgDataset(test_data, test_label, test_transform)
test_loader = DataLoader(test_dataset, batch_size=20, shuffle=False)
print("already process data...")
print("if GPU can be used?:", torch.cuda.is_available()) # true 查看GPU是否可用
model = Model().cuda() # 利用GPU加速
print("already prepared model...")
loss = nn.CrossEntropyLoss() # 定义损失函数
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 定义优化器
num_epoch = 30
print("start training...")
for epoch in range(num_epoch):
train_loss = 0
train_acc = 0
model.train() # 模型置于训练状态
for i, data in enumerate(train_loader): # train_loader = (inputs, label)
data[0] = data[0].type(torch.FloatTensor)
data[1] = data[1].type(torch.FloatTensor)
optimizer.zero_grad() # 梯度清零
pred_label = model(data[0].cuda()) # 实际输出
batch_loss = loss(pred_label, data[1].cuda().long()) # 计算误差
batch_loss.backward() # 反向传播
optimizer.step() # 更新参数
train_acc += np.sum(np.argmax(pred_label.cpu().data.numpy(), axis=1) == data[1].numpy()) # 计算准确度
train_loss += batch_loss.item() # 计算误差
print("epoch:{}" .format(epoch+1))
print("model's accuracy in train_data is:{}".format(train_acc/len(train_label)))
print("model's loss in train_data is:{}".format(train_loss/len(train_label)))
print("finish training...")
print("start testing...")
model.eval()
test_acc = 0
with torch.no_grad():
for i, data in enumerate(test_loader):
data[0] = data[0].type(torch.FloatTensor)
data[1] = data[1].type(torch.FloatTensor)
pred_label = model(data[0].cuda()) # 实际输出
test_acc += np.sum(np.argmax(pred_label.cpu().data.numpy(), axis=1) == data[1].numpy()) # 计算准确度
print("model's accuracy in test_data is:{}".format(test_acc / len(test_label)))
torch.save(model.state_dict(), "C:/CodeProject/python/cnn/cnn.pt")
print("already save model")