在学习深度学习框架之前,个人认为了解各个框架的输入才是一切的开始。因此,今天讨论下pytorch的数据输入。各种数据之间的互换、常用代码块
好多pytorch的测试代码都是用PIL读取图片而本人则比较喜欢opencv读取,那么先讨论PIL,cv2和torchvision.transforms的区别。
from PIL import Image
import numpy as np
image = Image.open('test.jpg')
print type(image) # 输出是PIL类型
print image.size
print image.mode # out: 'RGB'
print image.getpixel((0,0)) # out: (143, 198, 201)
# resize w*h
plt.figure(1)
plt.imshow(img)#可以正常的输出
plt.show()
image = image.resize((200,100),Image.NEAREST)
print image.size # out: (200,100)
image = np.array(image,dtype=np.float32) # 输出是numpy
print image.shape
plt.figure(1)
plt.imshow(img)#不能正常的输出
plt.show()
import cv2
import numpy as np
image = cv2.imread('test.jpg')
print type(image) # 输出是numpy数组
print image.dtype # 图片读入的类型都是uint8
print image.shape #
print image # BGR
'''
array([
[ [143, 198, 201 (dim=3)],[143, 198, 201],... (w=200)],
[ [143, 198, 201],[143, 198, 201],... ],
...(h=100)
], dtype=uint8)
'''
# w*h
image = cv2.resize(image,(100,200),interpolation=cv2.INTER_LINEAR)
print image.dtype # out: dtype('uint8')
print image.shape # out: (200, 100, 3)
plt.figure(1)
plt.imshow(img)#可以正常输出
plt.show()
关键看一下其中的toTensor函数
def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
转变PIL和numpy数据,输入是pil数据或者numpy图片,输出是转变后的数据
"""
if not(_is_pil_image(pic) or _is_numpy_image(pic)):
raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
if isinstance(pic, np.ndarray):
img = torch.from_numpy(pic.transpose((2, 0, 1)))
if isinstance(img, torch.ByteTensor):
return img.float().div(255)
else:
return img
if accimage is not None and isinstance(pic, accimage.Image):
nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32)
pic.copyto(nppic)
return torch.from_numpy(nppic)
# handle PIL Image
if pic.mode == 'I':
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
elif pic.mode == 'I;16':
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
elif pic.mode == 'F':
img = torch.from_numpy(np.array(pic, np.float32, copy=False))
elif pic.mode == '1':
img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
else:
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
# PIL image mode: L, P, I, F, RGB, YCbCr, RGBA, CMYK
if pic.mode == 'YCbCr':
nchannel = 3
elif pic.mode == 'I;16':
nchannel = 1
else:
nchannel = len(pic.mode)
img = img.view(pic.size[1], pic.size[0], nchannel)
# put it from HWC to CHW format
# yikes, this transpose takes 80% of the loading time/CPU
img = img.transpose(0, 1).transpose(0, 2).contiguous()
if isinstance(img, torch.ByteTensor):
return img.float().div(255)
else:
return img
PIL图片转换成numpy后,格式为(h,w,c),像素顺序为RGB;
opencv在读取后就是numpy,格式为(h,w,c),像素顺序为BGR;
torchvison.transforms函数会自己判断图片是PIL格式还是numpy格式,并变成torch所需的格式。
PIL和cv2的互相转换代码如下:
import cv2
from PIL import Image
import numpy as np
def pil_cv2(img_path):
image = Image.open(img_path)
img = cv2.cvtColor(np.asarray(image),cv2.COLOR_RGB2BGR)
return img
import cv2
from PIL import Image
def cv2_pil(img_path):
image = cv2.imread(img_path)
image = Image.fromarray(cv2.cvtColor(image,cv2.COLOR_BGR2RGB))
return image
在做测试的时候代码如下,可以看出torch的像素顺序为RGB,这就是torch使用PIL多的原因大概。
def totorch(img_path,mode):
if mode=="PIL":
img = Image.open(img_path)
if img.mode == 'L':
img = img.convert('RGB')
img = np.array(img)
else:
img=cv2.imread(img_path)
height, width, _ = img.shape
max_im_shrink = np.sqrt(
1700 * 1200 / (img.shape[0] * img.shape[1]))
image = cv2.resize(img, None, None, fx=max_im_shrink,
fy=max_im_shrink, interpolation=cv2.INTER_LINEAR)
# image = cv2.resize(img, (640, 640))
x = to_chw_bgr(image)
x = x.astype('float32')
x -= cfg.img_mean
if mode=="PIL":
x = x[[2, 1, 0], :, :]
x = Variable(torch.from_numpy(x).unsqueeze(0))
if use_cuda:
x = x.cuda()
t1 = time.time()
y = net(x)
首先了解下什么是字典状态
在Pytorch中,torch.nn.Module 模型的可学习参数(即权重和偏差)包含在模型的 parameters 中,(使用model.parameters()可以进行访问)。 state_dict 仅仅是python字典对象,它将每一层映射到其参数张量。注意,只有具有可学习参数的层(如卷积层、线性层等)的模型才具有 state_dict 这一项。优化目标 torch.optim 也有 state_dict 属性,它包含有关优化器的状态信息,以及使用的超参数
保存:
torch.save(model.state_dict(), PATH)
加载:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()
保存:
torch.save(model, PATH)
加载
# Model class must be defined somewhere
model = torch.load(PATH)
model.eval()
保存:
torch.save({
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
...
}, PATH)
加载:
model = TheModelClass(*args, **kwargs)
optimizer = TheOptimizerClass(*args, **kwargs)
checkpoint = torch.load(PATH)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.eval()
# - or -
model.train()
保存:
torch.save({
'modelA_state_dict': modelA.state_dict(),
'modelB_state_dict': modelB.state_dict(),
'optimizerA_state_dict': optimizerA.state_dict(),
'optimizerB_state_dict': optimizerB.state_dict(),
...
}, PATH)
加载:
modelA = TheModelAClass(*args, **kwargs)
modelB = TheModelBClass(*args, **kwargs)
optimizerA = TheOptimizerAClass(*args, **kwargs)
optimizerB = TheOptimizerBClass(*args, **kwargs)
checkpoint = torch.load(PATH)
modelA.load_state_dict(checkpoint['modelA_state_dict'])
modelB.load_state_dict(checkpoint['modelB_state_dict'])
optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
optimizerB.load_state_dict(checkpoint['optimizerB_state_dict'])
modelA.eval()
modelB.eval()
# - or -
modelA.train()
modelB.train()
保存:
torch.save(model.module.state_dict(), PATH)
加载可以用任何方式加载,但要注意使用关键字 map_location
对于cnn前馈神经网络如果前馈一次写一个forward函数会有些麻烦,在此就有两种简化方式,ModuleList和Sequential。建立nn.Sequential()对象,必须小心确保一个块的输出大小与下一个块的输入大小匹配。
第一种写法:
net1=nn.Sequential()
net1.add_module('conv', nn.Conv2d(3,3,3))
net1.add_module('batchnorm', nn.BatchNorm2d(3))
net1.add_module('activation_layer', nn.ReLU())
第二种写法:
net2 = nn.Sequential(
nn.Conv2d(3, 3, 3),
nn.BatchNorm2d(3),
nn.ReLU())
第三种写法:
from collections import OrderedDict
net3=nn.Sequential(OrderedDict([
("conv1",nn.Conv2d(3,3,3)),
("batch",nn.BatchNorm2d(3)),
("activat",nn.ReLU())
]))
打印出三种写法的网络输出:
print(net1)
'''Sequential(
(conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(batchnorm): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activation_layer): ReLU()
)'''
print(net2)
'''Sequential(
(0): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU())'''
print(net3)
'''Sequential(
(conv1): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1))
(batch): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(activat): ReLU())'''
so,可根据名字或序号取出子module
x= torch.randn(3,3,320,320)
y=net1(x)
net1.conv, net2[0], net3.conv1
(Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)),
Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)),
Conv2d (3, 3, kernel_size=(3, 3), stride=(1, 1)))
vgg_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',512, 512, 512, 'M']
def vgg(cfg, i, batch_norm=False):
layers = []
in_channels = i
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
elif v == 'C':
layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
else:
conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
if batch_norm:
layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
else:
layers += [conv2d, nn.ReLU(inplace=True)]
in_channels = v
conv6 = nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6)
conv7 = nn.Conv2d(1024, 1024, kernel_size=1)
layers += [conv6,
nn.ReLU(inplace=True), conv7, nn.ReLU(inplace=True)]
return layers
vgg=vgg(vgg_cfg,3)
vgg=nn.ModuleList(vgg)
x= torch.randn(3,3,320,320)
for i in range(len(vgg)):
x=vgg[i](x)
print(vgg)
ModuleList(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)
获取网络的层名和每层的结构
for name,module in vgg.named_children():
print("name is {}".format(name))
print(module)
输出
name is 0
Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
name is 1
ReLU(inplace=True)
name is 2
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
named_modules()和named_children()区别在于一个返回的是子模块的迭代器,另一个返回的是所有模块的迭代器。
import torch
import torch.nn as nn
class TestModule(nn.Module):
def __init__(self):
super(TestModule,self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(16,32,3,1),
nn.ReLU(inplace=True)
)
self.layer2 = nn.Sequential(
nn.Linear(32,10)
)
def forward(self,x):
x = self.layer1(x)
x = self.layer2(x)
model = TestModule()
for name, module in model.named_children():
print('children module:', name)
for name, module in model.named_modules():
print('modules:', name)
输出
>>out:
children module: layer1
children module: layer2
modules:
modules: layer1
modules: layer1.0
modules: layer1.1
modules: layer2
modules: layer2.0
获取参数名和其值
for name,param in vgg.named_parameters():
print("name is {}".format(name))
print(param)
输出
name is 0.weight
Parameter containing:
tensor([[[[-3.6296e-02, 1.0836e-01, 1.3510e-01],
[-1.3361e-01, 5.9745e-02, 2.4438e-02],
[-1.7095e-01, -9.9576e-02, 1.1297e-01]],
[[-8.1718e-02, 6.0913e-02, -3.0109e-02],
[-1.5120e-01, 8.5647e-02, 1.4474e-01],
[-6.8853e-02, -8.2803e-02, -1.8270e-01]]])
name is 0.bias
Parameter containing:
tensor([ 1.8735e-01, 3.1941e-03, -1.8747e-01, -3.1531e-02, 3.9880e-04,
-8.7427e-02, 1.9193e-01, -7.6810e-03, -5.3820e-02, -5.4126e-02,
1.5074e-01, -6.0450e-02, -7.7125e-02, 8.1087e-02, 1.8392e-01,
7.3845e-02, 1.4453e-01, -1.0507e-01, -5.9577e-02, 2.3714e-02],
requires_grad=True)
name is 2.weight
Parameter containing:
tensor([[[[-2.3275e-02, -3.5920e-02, -3.5464e-02],
[-1.2356e-02, -1.4983e-02, -2.8448e-02],
[ 7.7514e-03, 3.7017e-02, -3.8343e-02]],
[[-2.7261e-02, 2.7608e-03, 3.8855e-02],
[ 3.4545e-02, -2.2671e-02, -1.8810e-02]])
两种方法的不同在于ModuleList传入的参数是一个列表,并且ModuleList不支持自动推导。
data_transform= transforms.Compose([
transforms.Resize(224),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_dataset=datasets.ImageFolder(root="/disk3/dataset/classfy/finaldataset/train",transform=data_transform)
train_loader=torch.utils.data.DataLoader(train_dataset,batch_size=4,shuffle=True)
val_dataset=datasets.ImageFolder(root="/disk3/dataset/classfy/finaldataset/val",transform=data_transform)
val_loader=torch.utils.data.DataLoader(val_dataset,batch_size=4,shuffle=True)
只需要数据按下图摆放即可
train
val
编写自己的dataset类,需要继承Dataset类,并进行override,最重要的复写类中的几个函数如下:
(1) init : 读各种格式的数据集、路径等,控制传入参数
(2) getitem : 使dataset[i]能够获得第i个样本数据,即导入具体数据
(3) len : len(dataset) returns the size of the dataset
class mydataset(Dataset):
def __init__(self,txt_path,transform=data_transform):
self.image_path,self.label_name=self.get_image_path(txt_path)
self.transform=data_transform
def __getitem__(self,index):
img_path=self.image_path[index]
label=int(self.label_name[index])
img=Image.open(img_path)
if self.transform is not None:
img=self.transform(img)
return img,label
def __len__(self):
return len(self.label_name)
def get_image_path(self,path):
with open(path,"r") as fr:
total_list=[i.replace("\n","") for i in fr.readlines()]
img_list=[]
label_list=[]
for i in total_list:
i=i.split(" ")
img_list.append(i[0])
label_list.append(i[1])
return img_list,label_list
train1=mydataset("/disk3/dataset/classfy/finaldataset/train.txt",data_transform)
test1=mydataset("/disk3/dataset/classfy/finaldataset/test.txt",data_transform)
train1_loader=torch.utils.data.DataLoader(train1,batch_size=4,shuffle=True)
val1_loader=torch.utils.data.DataLoader(test1,batch_size=4,shuffle=True)
其中__getitem__()函数主要关注下transform函数的输出和__getitem__()函数的输出,以后在探究。现在可行的是PIL读取的图片可直接送入transform(),getitem()输出的label不可以是string。
def train():
for i in range(epochs):
runing_loss=0
trian_correct=0
train_total=0
for j,(images,labels) in enumerate(train1_loader):
images=Variable(images.cuda())
labels=Variable(labels.cuda())
optimizer.zero_grad()
output=alexnet(images)
_,pred=torch.max(output.data,1)
trian_correct+=(pred==labels.data).sum()
loss=criterion(output,labels)
loss.backward()
optimizer.step()
runing_loss+=loss.item()
train_total+=labels.size(0)
if j%100==0:
print("train epoch is{} ,loss is{},acc is{}".format(i+1,runing_loss/train_total,
100*trian_correct/train_total))
print("train epoch is{} ,loss is{},acc is{}".format(i+1,runing_loss/train_total,
100*trian_correct/train_total))
correct=0
test_loss=0
test_total=0
alexnet.eval()
for (test_images,test_labels) in val1_loader:
test_images=Variable(test_images.cuda())
test_labels=Variable(test_labels.cuda())
test_output=alexnet(test_images)
_,pred=torch.max(test_output.data,1)
loss=criterion(test_output,test_labels)
test_loss+=loss.item()
test_total+=test_labels.size(0)
correct+=(pred==test_labels.data).sum()
print("test epoch is{},loss is{},acc is{}".format(i+1,test_loss/test_total,100*correct/test_total))
最后上传一个完整代码