import os
import numpy as np
import torch
import random
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from tools.my_dataset import RMBDataset
from PIL import Image
from matplotlib import pyplot as plt
def set_seed(seed=1):
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
set_seed(1) # 设置随机种子
# 参数设置
MAX_EPOCH = 10
BATCH_SIZE = 1
LR = 0.01
log_interval = 10
val_interval = 1
rmb_label = {"1": 0, "100": 1}
喂入数据
split_dir = os.path.join("..", "..","classification_RMB", "rmb_split")
train_dir = os.path.join(split_dir, "train")
valid_dir = os.path.join(split_dir, "valid")
# 构建MyDataset实例
train_data = RMBDataset(data_dir=train_dir, transform=train_transform)
# 构建DataLoder
train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
以下将以这部分进行展开
train_transform=transforms.Compose([
transforms.Resize((224, 224)),
变换接口
transforms.ToTensor(),#视情况而定
transforms.Normalize(norm_mean, norm_std),
])
四维可视化版本:
for epoch in range(MAX_EPOCH):
for i, data in enumerate(train_loader):
inputs, labels = data # B C H W
img_tensor = inputs[0, ...] # C H W torch.Size([1, 3, 512, 512]) #1是batch_Size
img = transform_invert(img_tensor, train_transform)
plt.imshow(img)
plt.show()
plt.pause(0.5)
plt.close()
五维可视化版本:
for epoch in range(MAX_EPOCH):
for i, data in enumerate(train_loader):
inputs, labels = data
bs, ncrops, c, h, w = inputs.shape
for n in range(ncrops):#ncrops就是原图切出的的5块 fivecrop
img_tensor = inputs[0, n, ...] # C H W
img = transform_invert(img_tensor, train_transform)
plt.imshow(img)
plt.show()
plt.pause(1)
transforms.CenterCrop(196),#从中心开始裁剪得出196*196尺寸
如果超出224,如填入512,则仍然进行中心裁剪,不过剩余部分用黑色填充
size:所需要裁剪图像尺寸,padding:设置填充大小,当为a时,上下左右均填充a个像素,当为(a,b) 上下填充b个像素,左右填充a个像素。pad_if_need 若图像小于设定size,则填充
随机裁剪分为四大模式:constant(默认),像素由fill设定,edge:像素值由图像边缘像素决定,reflect:镜像填充,最后一个像素不进行镜像变换如:[1,2,3,4]->[3,2,|1|,2,3|,4|,3,2]
symmetric: 最后一个像素镜像,如:[1,2,3,4]->[2,1|,1,2|,3,4|,4,3|]
transforms.CenterCrop(196),#从中心开始裁剪得出196*196尺寸
transforms.RandomCrop(224, padding=16, fill=(255, 0, 0))
transforms.RandomCrop(512, pad_if_needed=True)
#用最边缘的像素点对边界区域填充
transforms.RandomCrop(224, padding=64, padding_mode='edge'),
transforms.RandomCrop(224, padding=64, padding_mode='reflect'),
transforms.RandomCrop(1024, padding=1024, padding_mode='symmetric'),
transforms.RandomResizedCrop(size=224, scale=(0.5, 0.5)),
scale随机裁剪面积默认是0.08,1,如0.6则是对原始图像随机选取60%的面积进行裁剪,0.5,0.5 每次都裁剪一半
transforms.FiveCrop(112),
注意这里返回的是tuple需要转回tensor
transforms.Lambda(lambda crops: torch.stack([(transforms.ToTensor()(crop)) for crop in crops])),
这里的逻辑是传入裁剪的crops,然后通过列表推导式将遍历展开,对每个crop进行toTensor转换,然后再用stack堆叠起来返回回去。
记住这里转完后把后面toTensor转换去掉。然后这里还会报错
Exception: Invalid img shape, expected 1 or 3 in axis 2, but got 5!就要注意展示的时候获取几维的向量
最后获取5片东西,上左上右左下右下中间
transforms.TenCrop(112, vertical_flip=False), #False 裁剪完成后,进行水平翻转
transforms.Lambda(lambda crops: torch.stack([(transforms.ToTensor()(crop)) for crop in crops])),
水平翻转
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomVerticalFlip(p=0.5),
垂直翻转
这两个所返回的是4维度的tensor
transforms.RandomRotation((90), expand=True)
#矩形框扩大,使得在图片旋转后,还能保留图像像素
#batch=16时,会报尺寸错误,expand=True所以记得要resize
transforms.RandomRotation(30, center=(0, 0), expand=True),
ransforms.Pad(padding=32, fill=(255, 0, 0), padding_mode='constant')
在这里插入图片描述
transforms.Pad(padding=(8, 16, 32, 64), fill=(255, 0, 0), padding_mode='symmetric'),
transforms.ColorJitter(brightness=0.5),#亮度
transforms.ColorJitter(contrast=0.5),#对比度
transforms.ColorJitter(saturation=0.5),#饱和度
transforms.ColorJitter(hue=0.3),#色相,为a时,[-a,a]选择参数
transforms.Grayscale(num_output_channels=3),
#num_output_channels只能1或者3,注意通道匹配,可以检查normalize是否3通道normal
仿射变换是二维线性变换,由五种基本原子变换构成,分别是旋转、平移、缩放、错切和翻转
transforms.RandomAffine(degrees=30),
transforms.RandomAffine(degrees=0, translate=(0.2, 0.2), fillcolor=(255, 0, 0)),#平移,向左平移,fillcolor边缘填充颜色
transforms.RandomAffine(degrees=0, scale=(0.7, 0.7)),#每张图都取70%
transforms.RandomAffine(degrees=0, shear=(0, 0, 0, 45)),#y轴错切
transforms.RandomAffine(degrees=0, shear=90, fillcolor=(255, 0, 0)),
transforms.ToTensor(),
transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=(254/255, 0, 0)),#填充像素值,每个像素值要除以255
transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value='1234'),#random模式,就会填充任意的像素
transforms.RandomChoice([transforms.RandomVerticalFlip(p=1), transforms.RandomHorizontalFlip(p=1)]),
transforms.RandomApply([transforms.RandomAffine(degrees=0, shear=45, fillcolor=(255, 0, 0)),
transforms.Grayscale(num_output_channels=3)], p=0.5),
transforms.RandomOrder([transforms.RandomRotation(15),
transforms.Pad(padding=32),
transforms.RandomAffine(degrees=0, translate=(0.01, 0.1), scale=(0.9, 1.1))]),
class AddPepperNoise(object):
def __init__(self,snr,p=0.9):
assert isinstance(snr,float) or (isinstance(p,float))
self.snr=snr
self.p=p
def __cal__(self,img):
if random.uniform(0,1)
题外话:
这是我整理最烦躁的时候,觉得真的好没用啊,不过记了就是一个进步。为了更快看懂代码出发