from __future__ import print_function,division
import torch
import time
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets,transforms
from torch.autograd import Variable
import numpy as np
from torchvision import models
#import matplotlib.pyplot as plt
from torch.utils.data.dataloader import DataLoader
import torchvision
from torch.utils.tensorboard import SummaryWriter
size = (224,224)
#数据类型转换
train_transforms = transforms.Compose([
#数据增强
transforms.Resize(size), #将图片进行裁剪
transforms.RandomVerticalFlip(0.5), #将图片垂直翻转
transforms.RandomHorizontalFlip(0.3), #将图片水平翻转
transforms.ToTensor(), #把numpy类型数据转换成tensor类型
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), #正则化
transforms.RandomCrop(32,padding=4),
transforms.CenterCrop(size=(200,200))
])
test_transforms = transforms.Compose([
transforms.Resize(size),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#数据下载及读取
BATCH_SIZE = 32 #每次处理8个数据
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') #判断是用cpu还是用gpu
print(DEVICE.type)
EPOCHS = 50
#读取数据集
train_dir="/gpfs/home/xxy_1997032/wjl/data/data_spilit/train"# 训练集路径
test_dir="/gpfs/home/xxy_1997032/wjl/data/data_spilit/test"# 测试集路径
train_set = datasets.ImageFolder(train_dir,transform=test_transforms)
test_set = datasets.ImageFolder(test_dir,transform=test_transforms)
#计算训练集和测试集的图片数量
train_num = len(train_set)
test_num = len(test_set)
# 每个epoch迭代次数, BATCH_SIZE即每训练一次加载的图片数目,计算出迭代的次数
train_iter = train_num // BATCH_SIZE
test_iter = test_num // BATCH_SIZE
#装载数据,shuffle将数据的顺序打乱,
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=True)
class Dog_Cat_Net(nn.Module):
def __init__(self,num_classes=5):
super(Dog_Cat_Net,self).__init__()
#根据VGG16创建新模型,设置为预训练模式
net = models.vgg16(pretrained=True)
#print(net)
#在容器中添加多个网络层,以顺序的方式包装一组网络
net.classifier = nn.Sequential()
self.features = net
#self.features = nn.Sequential(*list(net.children())[:-1])
self.classifier = nn.Sequential(
nn.Linear(512*7*7,512),
nn.ReLU(True),
nn.Dropout(0.8),
nn.Linear(512,128),
nn.ReLU(True),
nn.Dropout(0.8),
nn.Linear(128,num_classes)
)
def forward(self,x):
x = self.features(x)
x = x.view(x.size(0),-1)
x = self.classifier(x)
return x
PATH = './my_vgg.pth'
# torch.save(model.state_dict(),PATH)
model = Dog_Cat_Net()
torch.save(model.state_dict(),PATH)
model.load_state_dict(torch.load(PATH))
model.to(DEVICE)
params = [{'params':md.parameters()}for md in model.children()
if md in [model.classifier]]
optimizer = optim.SGD(params,lr=0.01,momentum=0.9)#SGD优化器
#optimizer = optim.Adam(model.parameters(),lr=0.001,weight_decay=0.01)
# 定义tensorboard
tb_writer = SummaryWriter('log/vgg16/')
# tag即标题
train_tags = 'Train-Loss'
test_tags = 'Test-Loss'
def train(net,EPOCH):
net.train()
epoch = EPOCH
loss = 0
iteration = 0
train_correct = 0
for batch_idx,(data,target) in enumerate(train_loader):
#print(batch_idx)
#第n次迭代
iteration = epoch * train_iter + batch_idx
data,target = data.to(DEVICE),target.to(DEVICE)
data,target = Variable(data),Variable(target)
#梯度初始化为0
optimizer.zero_grad()
output = net(data)
loss = F.cross_entropy(output,target)
loss.backward()
optimizer.step()
pred = output.data.max(1,keepdim=True)[1]
train_correct += pred.eq(target.view_as(pred)).sum().item()
if batch_idx % 10 ==0:
print('Train Epoch:{} [{}/{} ({:0f}%)]\tLoss:{:.06f}'.format(
EPOCH+1,batch_idx*len(data),len(train_loader.dataset),
100.*batch_idx/len(train_loader),loss.item()
))
loss /= len(train_loader.dataset)
tb_writer.add_scalars(train_tags, {'Train:':loss}, epoch)
print(train_correct)
return train_correct
#损失函数
criterion = nn.CrossEntropyLoss()
def test(net,EPOCH):
net.eval()
epoch = EPOCH
test_loss = 0
test_correct = 0
iteration = 0
for batch_idx,(data,target) in enumerate(test_loader):
#第n次迭代
iteration = epoch*test_iter + batch_idx
data,target = data.to(DEVICE),target.to(DEVICE)
data,target = Variable(data,volatile=True),Variable(target)
with torch.no_grad():
output=net(data)
test_loss = criterion(output, target).item()*data.size(0)
pred = output.data.max(1,keepdim=True)[1]
test_correct += pred.eq(target.view_as(pred)).sum().item()
test_loss/=len(test_loader.dataset)
tb_writer.add_scalars(test_tags, {'Test:':test_loss},epoch)
print('\nTest set:Average loss:{:.4f},Accuracy:{}/{} ({:.0f}%)\n'.format(
test_loss,test_correct,len(test_loader.dataset),
100.*test_correct/len(test_loader.dataset)
))
return test_correct
t1 = time.time()
for train_epoch in range(EPOCHS):
train_accuracy = train(model,train_epoch)/len(train_loader.dataset)
print(train_accuracy)
test_accuracy = test(model,train_epoch)/len(test_loader.dataset)
print(test_accuracy)
tb_writer.add_scalars('Train_accuracy/Test_accuracy',{'Train':train_accuracy,'Test':test_accuracy},train_epoch)
tb_writer.close()
t2 = time.time()
print('VGG16_time:{}s'.format(t2-t1))
注意事项:
transforms.ToTensor()一定要在transforms.RandomVerticalFlip()和transforms.RandomHorizontalFlip()的后面,一定要在transforms.Normalize()的前面,否则会报错。