推荐一个入门学习的地址https://github.com/bharathgs/Awesome-pytorch-list
pyTorch doc:https://pytorch.org/docs/stable/index.html
anaconda 安装pytorch (support cuda)
pip install torch==1.1.0 -f https://download.pytorch.org/whl/cu90/stable
疑问:
1.优化器的选择;loss函数的选择;参数的设置
目录
1.读取图像数据
method1:测试单张图片
method2:自定义类,读取真个文件夹
2.cuda 使用
3.tensor和Variable
4.transforms
5.DataLoader 和 Dataset
5.1 直接调用
5.2 自定义
5.2.1 each class one folder
5.2.2 all class in one folderr
6.torchversion
*.不熟悉的code记录
*.1 torch.backends.cudnn.benchmark = true
from PIL import Image
import torchvision.transforms as transforms
#transform=transform=transforms.ToTensor()
transform = transforms.Compose([transforms.ToTensor(),
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])
img_path="5_283.bmp"
img = Image.open(img_path)
data = Variable(transform(img)) // transform可以根据自己的需要来设置
#print(data.size())
data = data.unsqueeze(0) //这一步是因为我在测试时维度部队,net需要一个四维的输入,而data只是一个三维的数据,所以在外面在加一层维度
Pytorch自定义读取数据的方式,主要用到两个类:torch.utils.data.Dataset
和torch.utils.data.DataLoade
import cv2
import os
import numpy as np
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
name = 'C:\\Users\\Administrator\\Desktop\\history\\HED-BSDS\\test1.lst'
base_path = 'C:\\Users\\Administrator\\Desktop\\history\\HED-BSDS'
#首先定义一个Dataset的子类->myDataset
class myDataset(Dataset):
def __init__(self, name, base_path):
f = open(name)
self.filenames = f.readlines()
f.close()
#override这两个方法
def __getitem__(self, index):
path = self.filenames[index]
print(os.path.join(base_path, path))
img = cv2.imread(os.path.join(base_path, path).strip())
img = torch.Tensor(img)
return img
def __len__(self):
return len(self.filenames)
dataset = myDataset
train_loader = DataLoader(dataset(name=name, base_path=base_path),
batch_size=4, shuffle=True)
for img in train_loader:
print(img.size())
cv2.imshow('we', np.uint8(img.numpy()[0]))
cv2.waitKey()
1.确认设备是否支持cuda
improt torch
torch.cuuda.is_available()//如果返回false,则不支持
2.数据转换到cuda上,相关的有网络模型net,训练数据data,损失函数criterion(可选)
转换的方式有两种:1.to(device) 2. net.cuda()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
for batch_idx, (data, target) in enumerate(train_loader):
data, target = Variable(data.to(device)), Variable(target.to(device))
PyTorch 在autograd模块中实现了计算图的相关功能,autograd的核心数据结构是Variable。Variable封装了Tensor,并记录对Tensor的操作记录来构建计算图。
forward的输入输出都是Variable,只有Variable才具有自动求导功能,Tensor是没有的,所以在输入时,需要把Tensor封装成Varible。
https://pytorch-cn.readthedocs.io/zh/latest/torchvision/torchvision-transform/
1)scale(size): 将w,h 缩放成size,最小边为size
2)CenterCrop(size):size可是一个int,也可以是(w,h)中心切割
3)RandomCrop(size);随机切割
4)RandomHorizontalFlip:随机水平翻转
5)RandomSizedCrop(size, interpolation=2):随机切,然后缩放成size
6)Pad:填充
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transforms.Compose([
transforms.RandomSizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
大多数其他的PyTorch教程和示例都希望你先按照训练集和验证集来组织文件夹,然后在训练集和验证集中再按照类别进行组织。
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_loader = data.DataLoader(
datasets.ImageFolder(filepath,
transforms.Compose([
transforms.RandomSizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])),
batch_size=16,
shuffle=True)
class CatDogDataset(Dataset):
def __init__(self, root, resize, mode, namefile):
self.root = root
self.size = resize
self.namefile = namefile
self.name2label = {}
for name in sorted(os.listdir(os.path.join(root))):
#过滤掉非目录文件
if not os.path.isdir(os.path.join(root, name)):
continue
#构建字典,名字:0~4数字
self.name2label[name] = len(self.name2label.keys())
print(self.name2label)
self.images, self.labels = self.load_csv()
if mode == "train": # 80%
self.images = self.images[:int(0.8*len(self.images))]
self.labels = self.labels[:int(0.8*len(self.labels))]
elif mode == "val": # 10% = 80%~95%
self.images = self.images[int(0.8*len(self.images)):int(0.95 * len(self.images))]
self.labels = self.labels[int(0.8*len(self.labels)):int(0.95 * len(self.labels))]
else: # 5% = 95%~100%
self.images = self.images[int(0.95 * len(self.images)):]
self.labels = self.labels[int(0.95 * len(self.labels)):]
def load_csv(self):
if not os.path.isdir(os.path.join(self.root, self.namefile)):
images = []
for name in self.name2label.keys():
images += glob.glob(os.path.join(self.root, name, '*jpg'))
images += glob.glob(os.path.join(self.root, name, '*jpeg'))
images += glob.glob(os.path.join(self.root, name, '*png'))
# 1165 ['pokemon/pokeman/bulbasaur/00000159.jpg',
#print(len(images), images)
random.shuffle(images)
with open(os.path.join(self.root, self.namefile), mode='w',newline='') as f:
writer = csv.writer(f)
for img in images:
name = img.split(os.sep)[-2]
label = self.name2label[name]
writer.writerow([img, label])
print('writer into cvs file:', self.namefile)
images,labels = [],[]
#images, labels = [],[]
with open(os.path.join(self.root, self.namefile)) as f:
reader = csv.reader(f)
for row in reader:
img, label = row
label = int(label)
images.append(img)
labels.append(label)
assert len(images) == len(labels), 'num of imgs != labels'
return images, labels
def __len__(self):
return len(self.images)
def denormalize(self, x_hat):
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
# x_hat = (x-mean)/std
# x = x_hat*std = mean
# x: [c, h, w]
# mean: [3] => [3, 1, 1]
mean = torch.tensor(mean).unsqueeze(1).unsqueeze(1)
std = torch.tensor(std).unsqueeze(1).unsqueeze(1)
# print(mean.shape, std.shape)
x = x_hat * std + mean
return x
def __getitem__(self, idx):
# idx~[0~len(images)]
# self.images, self.labels
# img: 'pokemon/bulbasaur/00000000.png'
# label: 0
img, label = self.images[idx], self.labels[idx]
tf = transforms.Compose([
lambda x:Image.open(x).convert("RGB"), # string path => image data
transforms.Scale(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225])
])
img = tf(img)
label = torch.tensor(label)
return img, label
专门处理图像的库。主要包含datasets models transforms utils
包含的数据集:
包含的模型:
加载模型example
import torchvision.models as models
resnet18 = models.resnet18()
alexnet = models.alexnet()
squeezenet = models.squeezenet1_0()
densenet = models.densenet_161()
也可以通过使用 pretrained=True 来加载一个别人预训练好的模型
如果网络的输入数据维度或类型上变化不大,设置 可以增加运行效率;如果每次迭代都变化的话,会导致 cnDNN 每次都会去寻找一遍最优配置,这样反而会降低运行效率。