下面完整代码在github仓库:传送门
猫狗识别的数据在百度网盘:
链接:https://pan.baidu.com/s/1_jgubf2Ay47ONnhcJcQ-ng
提取码:2re9
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import os
import torch
class MyDataset(Dataset):
def __init__(self, main_dir, is_train=True):
self.dataset = []
data_filename = "TRAIN" if is_train else "TEST"
# 循环获得样本数据文件夹下的训练集或测试集文件夹下的文件夹名(类别名)
for i, cls_filename in enumerate(os.listdir(os.path.join(main_dir, data_filename))):
# print(i)
# print(os.listdir(os.path.join(main_dir)))
# print(os.listdir(os.path.join(main_dir, data_filename)))
# 循环获得每个类别文件夹下的数字图片
for img_data in os.listdir(os.path.join(main_dir, data_filename, cls_filename)):
self.dataset.append([os.path.join(main_dir, data_filename, cls_filename, img_data), i]) # i作标签
# print(self.dataset) # ['D:\\PycharmProjects\\2020-08-25-全连接神经网络\\MNIST_IMG\\TRAIN\\0\\0.jpg', 0]
# 装图片路径可以节省内存,避免列表装了所有图片导致内存爆炸。
def __len__(self):
return len(self.dataset) # 获取图片长度(个数),方便迭代
def __getitem__(self, index): # 里面包括迭代器
data = self.dataset[index] # 根据索引来取[图片数据路径、标签]
image_data = self.image_preprocess(Image.open(data[0])) # 拿到图片数据路径并打开得到图片数据,并且做预处理。
label_data = data[1] # 拿到图片标签
return image_data, label_data
def image_preprocess(self, x):
return transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])(x) # 对图片数据进行预处理
if __name__ == '__main__':
data_path = r"G:\img"
dataset = MyDataset(data_path, True)
dataloader = DataLoader(dataset, 128, shuffle=True, num_workers=1, drop_last=True)
for data in dataloader: # [[图片数据, 标签], [图片数据, 标签]...]
print(data[0].shape)
print(data[1].shape)
import torch
import torch.nn as nn
import torch.utils.data as data
import matplotlib.pyplot as plt
from My_Dataset import MyDataset
from torch.optim import sgd, adam, adagrad, rmsprop, adadelta, adamax, adamw, sparse_adam, asgd
import cv2
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.layer1 = nn.Sequential(
nn.Linear(in_features=3*100*100, out_features=2048),
nn.Dropout(0.5),
nn.BatchNorm1d(2048), # N, H, W
# nn.LayerNorm(512), # C, H, W
# nn.InstanceNorm1d(512), # H, W (要求输入数据三维)
# nn.GroupNorm(2, 512) # C, H, W, 将512分成两组
nn.ReLU()
) # N, 512
self.layer2 = nn.Sequential(
nn.Linear(in_features=2048, out_features=1024),
nn.Dropout(0.5),
nn.BatchNorm1d(1024),
nn.ReLU()
) # N, 256
self.layer3 = nn.Sequential(
nn.Linear(in_features=1024, out_features=512),
nn.Dropout(0.5),
nn.BatchNorm1d(512),
nn.ReLU()
) # N, 128
self.layer4 = nn.Sequential(
nn.Linear(in_features=512, out_features=2),
) # N, 10
def forward(self, x):
# x = torch.reshape(x, [1, x.size(0), -1]) # 形状[1, N, C*H*W]
# print(x.shape)
# y1 = self.layer1(x)[0] # 这两行代码适用于在InstanceNorm1d的情况。将第一维去掉,变成两维
x = torch.reshape(x, [x.size(0), -1]) # 形状[N, C*H*W]
y1 = self.layer1(x)
y2 = self.layer2(y1)
y3 = self.layer3(y2)
self.y4 = self.layer4(y3)
out = torch.softmax(self.y4, 1)
return out
if __name__ == '__main__':
batch_size = 100
# 加载本地数据集
data_path = r"G:\cat_dog1"
train_data = MyDataset(data_path, True)
test_data = MyDataset(data_path, False)
train_loader = data.DataLoader(train_data, batch_size, shuffle=True)
test_loader = data.DataLoader(test_data, batch_size, shuffle=True)
if torch.cuda.is_available():
device = torch.device("cuda")
else:
device = torch.device("cpu")
net = Net().to(device)
net.load_state_dict(torch.load("./cat_dog_params.pth"))
# net = torch.load("./cat_dog_net.pth").to(device)
loss_function = nn.MSELoss()
# optimizer = torch.optim.SGD(net.parameters(), lr=1e-3, momentum=0.5, dampening=0,
# weight_decay=0, nesterov=False)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3, betas=(0.9, 0.99), eps=1e-8,
weight_decay=0, amsgrad=False) # betas:0.9越大越平滑, 默认效果最好
# weight_decay:表示正则化系数
# optimizer = adagrad.Adagrad(net.parameters())
# optimizer = adadelta.Adadelta(net.parameters())
# optimizer = rmsprop.RMSprop(net.parameters())
# optimizer = sgd.SGD(net.parameters(), 1e-3)
# optimizer = adam.Adam(net.parameters())
a = []
b = []
plt.ion()
net.train()
for epoch in range(100):
for i, (x, y) in enumerate(train_loader):
x = x.to(device)
y = y.to(device)
output = net(x)
# print(x.shape)
# print(output[0]) # 一张图片经过神经网络输出的十个值
# print(output.shape) # torch.Size([100, 10])
# print(y)
# 在1轴里面填1, 同时将标签形状变为(N, 1)
y = torch.zeros(y.cpu().size(0), 2).scatter_(1, y.cpu().reshape(-1, 1), 1).to(device)
# print(y)
# print(y.size(0))
loss = loss_function(output, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 10 == 0:
a.append(i + (epoch*(len(train_data) / batch_size)))
b.append(loss.item())
# plt.clf()
# plt.plot(a, b)
# plt.pause(0.01)
print("Epoch:{}, batch:{}/110, loss:{:.3f}".format(epoch, int(i), loss.item()))
# print(a)
torch.save(net.state_dict(), "./cat_dog_params.pth")
# # torch.save(net, "./cat_dog_net.pth")
if epoch % 5 == 0: # 每训练完五轮打印一次精度
net.eval()
eval_loss = 0
eval_acc = 0
for i, (x, y) in enumerate(test_loader):
x = x.to(device)
y = y.to(device)
out = net(x)
y = torch.zeros(y.cpu().size(0), 2).scatter_(1, y.cpu().reshape(-1, 1), 1).to(device)
loss = loss_function(out, y)
# print("Test_Loss:{:.3f}".format(loss.item()))
eval_loss += loss.item()*y.size(0)
arg_max = torch.argmax(out, 1)
y = y.argmax(1)
eval_acc += (arg_max==y).sum().item()
mean_loss = eval_loss / len(test_data)
mean_acc = eval_acc / len(test_data)
# print(y)
# print(torch.argmax(out, 1))
print("loss:{:.3f}, Acc:{:.3f}".format(mean_loss, mean_acc))
import os, random, shutil
import numpy as np
def moveFile(fileDir):
pathDir = os.listdir(fileDir) # 取图片的原始路径
print(pathDir)
# exit()
filenumber = len(pathDir)
# print(filenumber)
rate = 0.8 # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取一定数量图片
sample = random.sample(pathDir, picknumber) # 随机选取picknumber数量的样本图片
print(sample)
print(len(sample))
# exit()
for name in sample: # 取出4800张图片
print(name) # 0.2760.jpeg
# exit()
shutil.move(fileDir + "\\" +name, tarDir + "\\" +name)
for name in pathDir: # 再遍历一次猫所有的图片,如果该图片没有被取样为训练集,则把该样本放入测试集
if name not in sample:
print(name)
shutil.move(fileDir + "\\" + name, tarDir2 + "\\" +name)
else:
pass
return
if __name__ == '__main__':
# fileDir = r"G:\cat_dog2\Cat" # 源图片文件夹路径
# tarDir = r'G:\img\TRAIN\CAT' # 移动到新的文件夹路径
# tarDir2 = r"G:\img\TEST\CAT"
fileDir = r"G:\cat_dog2\Dog"
tarDir = r"G:\img\TRAIN\DOG"
tarDir2 = r"G:\img\TEST\DOG"
moveFile(fileDir)
import cv2
import os
# 将猫和狗的图片文件夹分别放在两个文件夹中
def read_directory(directory_name):
for filename in os.listdir(directory_name):
# print(filename) # 0.1.jpeg
strs = filename.strip().split(".") # ['0', '1', 'jpeg']
strs = list(filter(bool, strs)) # ['0', '1', 'jpeg']
if strs[0] == "0":
img = cv2.imread(directory_name + "/" + filename) # 读取图片数据
# print(img)
#####显示图片#######
# cv2.imshow(filename, img)
# cv2.waitKey(0)
#####################
#####保存图片#########
cv2.imwrite("G:\cat_dog2\Cat" + "/" + filename, img)
elif strs[0] == "1":
img = cv2.imread(directory_name + "/" + filename)
# print(img)
#####显示图片#######
# cv2.imshow(filename, img)
# cv2.waitKey(0)
#####################
#####保存图片#########
cv2.imwrite("G:\cat_dog2\Dog" + "/" + filename, img)
else:
pass
read_directory("G:\cat_dog\img")