numpy
版
import random
import numpy as np
def gaussian_noise(x, noise_variance=(0, 0.1), prob=0.5):
"""
add random gaussian noise for input image
Args:
x: input image array, expected shape: H x W x D
noise_variance: variance of gaussian distribution
prob: probability of augmentation
Returns: augmented image
"""
if random.random() >= prob:
return x
if noise_variance[0] == noise_variance[1]:
variance = noise_variance[0]
else:
variance = random.uniform(noise_variance[0], noise_variance[1])
y = x + np.clip(np.random.normal(0.0, variance, size=x.shape), -0.1, 0.1)
return y
pytorch
版
# 均值为0,标准差为1,上下限为±0.2
noise = torch.clamp(torch.randn_like(image) * 0.1, -0.2, 0.2)
Gamma变换是对输入图像灰度值进行的非线性操作,使输出图像灰度值与输入图像灰度值呈指数关系:
V o u t = A ⋅ V i n γ V_{out}=A \cdot V_{in}^{\gamma} Vout=A⋅Vinγ
这个指数就是gamma,注意Vin的取值范围为0~1,因此需要进行归一化。
def augment_gamma(x, gamma_range=(0.5, 2), prob=0.5, epsilon=1e-7):
if random.random() >= prob:
return x
if np.random.random() < 0.5 and gamma_range[0] < 1:
gamma = np.random.uniform(gamma_range[0], 1)
else:
gamma = np.random.uniform(max(gamma_range[0], 1), gamma_range[1])
minm = x.min()
rnge = x.max() - minm
x = np.power(((x - minm) / float(rnge + epsilon)), gamma) * rnge + minm
return x
gamma_range
对应公式中gamma的取值范围在三维图像中随机打乱部分区域的体素
def local_pixel_shuffling(x,num_block=(800,1600), prob=0.5):
if random.random() >= prob:
return x
image_temp = copy.deepcopy(x)
orig_image = copy.deepcopy(x)
img_rows, img_cols, img_deps = x.shape
if num_block[0] == num_block[1]:
num_block = num_block[0]
else:
num_block = np.random.randint(num_block[0], num_block[1])
for _ in range(num_block):
# 限制窗口大小不能超过size的1/10,可自行修改
block_noise_size_x = random.randint(1, img_rows // 10)
block_noise_size_y = random.randint(1, img_cols // 10)
block_noise_size_z = random.randint(1, img_deps // 10)
noise_x = random.randint(0, img_rows - block_noise_size_x)
noise_y = random.randint(0, img_cols - block_noise_size_y)
noise_z = random.randint(0, img_deps - block_noise_size_z)
window = orig_image[0, noise_x:noise_x + block_noise_size_x,
noise_y:noise_y + block_noise_size_y,
noise_z:noise_z + block_noise_size_z,]
window = window.flatten()
np.random.shuffle(window)
window = window.reshape((block_noise_size_x, block_noise_size_y, block_noise_size_z))
image_temp[0, noise_x:noise_x + block_noise_size_x,
noise_y:noise_y + block_noise_size_y,
noise_z:noise_z + block_noise_size_z] = window
local_shuffling_x = image_temp
return local_shuffling_x
num_block
是要打乱的像素块数量block_noise_size_x,block_noise_size_y,block_noise_size_z
是每个像素块的长宽高,即窗口大小np.random.shuffle
打断像素数组,然后reshape
还原为三维块随机选取一定数量的各种大小和纵横比的窗口,将每个窗口内的所有像素替换为一个随机高斯噪声。
def image_in_painting(x, prob=0.5):
if random.random() >= prob:
return x
img_rows, img_cols, img_deps = x.shape
cnt = 5
# print('in painting')
while cnt > 0 and random.random() < 0.95:
# 限制窗口大小在图像尺寸的 1/8 到 1/4
block_noise_size_x = random.randint(img_rows // 8, img_rows // 4)
block_noise_size_y = random.randint(img_cols // 8, img_cols // 4)
block_noise_size_z = random.randint(img_deps // 8, img_deps // 4)
noise_x = random.randint(3, img_rows - block_noise_size_x - 3)
noise_y = random.randint(3, img_cols - block_noise_size_y - 3)
noise_z = random.randint(3, img_deps - block_noise_size_z - 3)
x[:,
noise_x:noise_x + block_noise_size_x,
noise_y:noise_y + block_noise_size_y,
noise_z:noise_z + block_noise_size_z] = np.random.rand(block_noise_size_x,
block_noise_size_y,
block_noise_size_z, ) * 1.0
cnt -= 1
return x
block_noise_size_x,block_noise_size_y,block_noise_size_z
是窗口的长宽高,cnt是窗口数量将图像分为大小相同但不重叠的立方体块,随机遮盖一定比例的块。
def random_masking(x, mask_ratio, patch_size):
"""
Perform per-sample random masking by per-sample shuffling.
expected input shape: H x W x D
"""
H, W, D = x.shape
h, w, d = H // patch_size, W // patch_size, D // patch_size
L = h * w * d
# N, L, D = x.shape # batch, length, dim
len_keep = int(L * (1 - mask_ratio))
mask_idx = np.random.permutation(L)[len_keep:]
mask = np.zeros(L, dtype=int)
mask[mask_idx] = 1
mask = mask.reshape((h, w, d))
mask = mask.repeat(patch_size, axis=0).repeat(patch_size, axis=1).repeat(patch_size,axis=2)
masked_x = x * (1 - mask)
return masked_x
mask_ratio
是掩码率,patch_size
是掩码的单位块的大小扰动后的图像适合做无监督预训练,从无标注的数据集中学习先验信息,将扰动后的图像输入网络中,执行重建任务。
在肾脏肿瘤数据集(KiTS19)的训练结果
从左往右为原始图像,扰动图像和重建图像,效果其实不怎么好,迁移到分割任务上不知道有没有用,慢慢做实验吧