转自AI Studio,原文链接:百度网盘AI大赛-图像处理挑战赛:水印智能消除赛 Baseline - 飞桨AI Studio
使用UNet消除图像中水印以完成百度网盘AI大赛-图像处理挑战赛:水印智能消除赛。
比赛链接
日常生活中带有水印的图片很常见,即使是PS专家,也很难快速且不留痕迹的去除水印。而使用智能去除水印的算法,可以快速自动去除图片中的水印。选手需要通过深度学习技术训练模型,对给定的真实场景下采集得到的带有水印的图片进行处理,并最终输出处理后的扫描结果图片。
本次比赛希望选手结合当下前沿的图像处理技术与计算机视觉技术,提升模型的训练性能和泛化能力,在保证效果精准的同时,注意模型在实际应用中的性能问题,做到尽可能的小而快。
因此,应尽可能不能使用过大的模型。
本项目使用UNet网络,对水印图像进行像素级转换。相比较于用于分割的UNet网络,只需要设定输出通道数为3即可。
本项目提供了一个仅在小批量数据上训练过的UNet网络参数model.pdparams作为示例,对应的成绩为:
ms_ssim | psnr | time_used | score |
---|---|---|---|
0.95577 | 23.94572 | 0.07173 | 0.59762 |
对于本赛题,主办方提供了巨大的训练数据。首当其冲的难题就是导入数据到AiStudio。
下面以导入bg_images.tar 和 watermark_datasets.part8.tar为例,展示如何使用这份~大到想弃赛~完美的数据集。
假设bg_images.tar和watermark_datasets.part8.tar所在的路径分别为'bg_images.tar'和'watermark_datasets.part8.tar',则可以用如下命令解压
In [1]
! tar -xvf bg_images.tar
! tar -xvf watermark_datasets.part8.tar
通过paddle.io.dataset构造读取器,便于读取数据。
数据预处理包括:
In [41]
import paddle
import os
import numpy as np
import pandas as pd
import cv2
class MyDateset(paddle.io.Dataset):
def __init__(self, mode = 'train', watermark_dir = '/home/aistudio/watermark_datasets.part8/', bg_dir = '/home/aistudio/bg_images/'):
super(MyDateset, self).__init__()
self.mode = mode
self.watermark_dir = watermark_dir
self.bg_dir = bg_dir
self.train_list = os.listdir(self.watermark_dir)
def __getitem__(self, index):
item = self.train_list[index]
bg_item = item[:14]+'.jpg'
img = cv2.imread(self.watermark_dir+item)
label = cv2.imread(self.bg_dir+bg_item)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
label = cv2.cvtColor(label, cv2.COLOR_BGR2RGB)
img = paddle.vision.transforms.resize(img, (512,512), interpolation='bilinear')
label = paddle.vision.transforms.resize(label, (512,512), interpolation='bilinear')
img = img.transpose((2,0,1))
label = label.transpose((2,0,1))
img = img/255
label = label/255
img = paddle.to_tensor(img).astype('float32')
label = paddle.to_tensor(label).astype('float32')
return img,label
def __len__(self):
return len(self.train_list)
# 对dataloader进行测试
'''
train_dataset=MyDateset()
train_dataloader = paddle.io.DataLoader(
train_dataset,
batch_size=16,
shuffle=True,
drop_last=False)
for step, data in enumerate(train_dataloader):
img, label = data
print(step, img.shape, label.shape)
break
'''
'\ntrain_dataset=MyDateset()\n\ntrain_dataloader = paddle.io.DataLoader(\n train_dataset,\n batch_size=16,\n shuffle=True,\n drop_last=False)\n\nfor step, data in enumerate(train_dataloader):\n img, label = data\n print(step, img.shape, label.shape)\n break\n'
鲁迅曾经说过:~人的精力是有限的~你的东西很好,但现在是我的了!
秉承着拿来主义的思想,直接从图像分割(UNet、PSPNet、Deeplab)复制一份UNet的代码过来,将其中的num_channels默认参数值设定为3。
修改后的网络将shape为(3, h, w)的数据转换为shape为(3, h, w)的数据,既可以将修改后的UNet网络用于图像风格迁移。
In [42]
# https://aistudio.baidu.com/aistudio/projectdetail/3340332?channelType=0&channel=0
"""
paddlepaddle-gpu==2.2.1
time:2021.07.16 9:00
author:CP
backbone:U-net
"""
import paddle
from paddle import nn
class Encoder(nn.Layer):#下采样:两层卷积,两层归一化,最后池化。
def __init__(self, num_channels, num_filters):
super(Encoder,self).__init__()#继承父类的初始化
self.conv1 = nn.Conv2D(in_channels=num_channels,
out_channels=num_filters,
kernel_size=3,#3x3卷积核,步长为1,填充为1,不改变图片尺寸[H W]
stride=1,
padding=1)
self.bn1 = nn.BatchNorm(num_filters,act="relu")#归一化,并使用了激活函数
self.conv2 = nn.Conv2D(in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
stride=1,
padding=1)
self.bn2 = nn.BatchNorm(num_filters,act="relu")
self.pool = nn.MaxPool2D(kernel_size=2,stride=2,padding="SAME")#池化层,图片尺寸减半[H/2 W/2]
def forward(self,inputs):
x = self.conv1(inputs)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
x_conv = x #两个输出,灰色 ->
x_pool = self.pool(x)#两个输出,红色 |
return x_conv, x_pool
class Decoder(nn.Layer):#上采样:一层反卷积,两层卷积层,两层归一化
def __init__(self, num_channels, num_filters):
super(Decoder,self).__init__()
self.up = nn.Conv2DTranspose(in_channels=num_channels,
out_channels=num_filters,
kernel_size=2,
stride=2,
padding=0)#图片尺寸变大一倍[2*H 2*W]
self.conv1 = nn.Conv2D(in_channels=num_filters*2,
out_channels=num_filters,
kernel_size=3,
stride=1,
padding=1)
self.bn1 = nn.BatchNorm(num_filters,act="relu")
self.conv2 = nn.Conv2D(in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
stride=1,
padding=1)
self.bn2 = nn.BatchNorm(num_filters,act="relu")
def forward(self,input_conv,input_pool):
x = self.up(input_pool)
h_diff = (input_conv.shape[2]-x.shape[2])
w_diff = (input_conv.shape[3]-x.shape[3])
pad = nn.Pad2D(padding=[h_diff//2, h_diff-h_diff//2, w_diff//2, w_diff-w_diff//2])
x = pad(x) #以下采样保存的feature map为基准,填充上采样的feature map尺寸
x = paddle.concat(x=[input_conv,x],axis=1)#考虑上下文信息,in_channels扩大两倍
x = self.conv1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
return x
class UNet(nn.Layer):
def __init__(self,num_classes=3):
super(UNet,self).__init__()
self.down1 = Encoder(num_channels= 3, num_filters=64) #下采样
self.down2 = Encoder(num_channels= 64, num_filters=128)
self.down3 = Encoder(num_channels=128, num_filters=256)
self.down4 = Encoder(num_channels=256, num_filters=512)
self.mid_conv1 = nn.Conv2D(512,1024,1) #中间层
self.mid_bn1 = nn.BatchNorm(1024,act="relu")
self.mid_conv2 = nn.Conv2D(1024,1024,1)
self.mid_bn2 = nn.BatchNorm(1024,act="relu")
self.up4 = Decoder(1024,512) #上采样
self.up3 = Decoder(512,256)
self.up2 = Decoder(256,128)
self.up1 = Decoder(128,64)
self.last_conv = nn.Conv2D(64,num_classes,1) #1x1卷积,softmax做分类
def forward(self,inputs):
x1, x = self.down1(inputs)
x2, x = self.down2(x)
x3, x = self.down3(x)
x4, x = self.down4(x)
x = self.mid_conv1(x)
x = self.mid_bn1(x)
x = self.mid_conv2(x)
x = self.mid_bn2(x)
x = self.up4(x4, x)
x = self.up3(x3, x)
x = self.up2(x2, x)
x = self.up1(x1, x)
x = self.last_conv(x)
return x
# 查看网络各个节点的输出信息
# paddle.summary(UNet(), (1, 3, 600, 600))
同样秉承着拿来主义的思想,从图像评价指标PSNR、SSIM以及MS-SSIM 复制一份MSSSIM代码过来。
~看不看得懂代码不重要,重要是看得懂文字,明白大佬已经写好了一个现成的直接调用的loss函数~
当然,仅有MSSSIM是不够的,还可以再根据通过Sub-Pixel实现图像超分辨率写一个PSNR的损失函数。
In [78]
import paddle
import paddle.nn.functional as F
def gaussian1d(window_size, sigma):
###window_size = 11
x = paddle.arange(window_size,dtype='float32')
x = x - window_size//2
gauss = paddle.exp(-x ** 2 / float(2 * sigma ** 2))
# print('gauss.size():', gauss.size())
### torch.Size([11])
return gauss / gauss.sum()
def create_window(window_size, sigma, channel):
_1D_window = gaussian1d(window_size, sigma).unsqueeze(1)
_2D_window = _1D_window.mm(_1D_window.t()).unsqueeze(0).unsqueeze(0)
# print('2d',_2D_window.shape)
# print(window_size, sigma, channel)
return _2D_window.expand([channel,1,window_size,window_size])
def _ssim(img1, img2, window, window_size, channel=3 ,data_range = 255.,size_average=True,C=None):
# size_average for different channel
padding = window_size // 2
mu1 = F.conv2d(img1, window, padding=padding, groups=channel)
mu2 = F.conv2d(img2, window, padding=padding, groups=channel)
# print(mu1.shape)
# print(mu1[0,0])
# print(mu1.mean())
mu1_sq = mu1.pow(2)
mu2_sq = mu2.pow(2)
mu1_mu2 = mu1 * mu2
sigma1_sq = F.conv2d(img1 * img1, window, padding=padding, groups=channel) - mu1_sq
sigma2_sq = F.conv2d(img2 * img2, window, padding=padding, groups=channel) - mu2_sq
sigma12 = F.conv2d(img1 * img2, window, padding=padding, groups=channel) - mu1_mu2
if C ==None:
C1 = (0.01*data_range) ** 2
C2 = (0.03*data_range) ** 2
else:
C1 = (C[0]*data_range) ** 2
C2 = (C[1]*data_range) ** 2
# l = (2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)
# ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
sc = (2 * sigma12 + C2) / (sigma1_sq + sigma2_sq + C2)
lsc = ((2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1))*sc
if size_average:
### ssim_map.mean()是对这个tensor里面的所有的数值求平均
return lsc.mean()
else:
# ## 返回各个channel的值
return lsc.flatten(2).mean(-1),sc.flatten(2).mean(-1)
def ms_ssim(
img1, img2,window, data_range=255, size_average=True, window_size=11, channel=3, sigma=1.5, weights=None, C=(0.01, 0.03)
):
r""" interface of ms-ssim
Args:
img1 (torch.Tensor): a batch of images, (N,C,[T,]H,W)
img2 (torch.Tensor): a batch of images, (N,C,[T,]H,W)
data_range (float or int, optional): value range of input images. (usually 1.0 or 255)
size_average (bool, optional): if size_average=True, ssim of all images will be averaged as a scalar
win_size: (int, optional): the size of gauss kernel
win_sigma: (float, optional): sigma of normal distribution
win (torch.Tensor, optional): 1-D gauss kernel. if None, a new kernel will be created according to win_size and win_sigma
weights (list, optional): weights for different levels
K (list or tuple, optional): scalar constants (K1, K2). Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results.
Returns:
torch.Tensor: ms-ssim results
"""
if not img1.shape == img2.shape:
raise ValueError("Input images should have the same dimensions.")
# for d in range(len(img1.shape) - 1, 1, -1):
# img1 = img1.squeeze(dim=d)
# img2 = img2.squeeze(dim=d)
if not img1.dtype == img2.dtype:
raise ValueError("Input images should have the same dtype.")
if len(img1.shape) == 4:
avg_pool = F.avg_pool2d
elif len(img1.shape) == 5:
avg_pool = F.avg_pool3d
else:
raise ValueError(f"Input images should be 4-d or 5-d tensors, but got {img1.shape}")
smaller_side = min(img1.shape[-2:])
assert smaller_side > (window_size - 1) * (2 ** 4), "Image size should be larger than %d due to the 4 downsamplings " \
"with window_size %d in ms-ssim" % ((window_size - 1) * (2 ** 4),window_size)
if weights is None:
weights = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]
weights = paddle.to_tensor(weights)
if window is None:
window = create_window(window_size, sigma, channel)
assert window.shape == [channel, 1, window_size, window_size], " window.shape error"
levels = weights.shape[0] # 5
mcs = []
for i in range(levels):
ssim_per_channel, cs = _ssim(img1, img2, window=window, window_size=window_size,
channel=3, data_range=data_range,C=C, size_average=False)
if i < levels - 1:
mcs.append(F.relu(cs))
padding = [s % 2 for s in img1.shape[2:]]
img1 = avg_pool(img1, kernel_size=2, padding=padding)
img2 = avg_pool(img2, kernel_size=2, padding=padding)
ssim_per_channel = F.relu(ssim_per_channel) # (batch, channel)
mcs_and_ssim = paddle.stack(mcs + [ssim_per_channel], axis=0) # (level, batch, channel) 按照等级堆叠
ms_ssim_val = paddle.prod(mcs_and_ssim ** weights.reshape([-1, 1, 1]), axis=0) # level 相乘
print(ms_ssim_val.shape)
if size_average:
return ms_ssim_val.mean()
else:
# 返回各个channel的值
return ms_ssim_val.flatten(2).mean(1)
class SSIMLoss(paddle.nn.Layer):
"""
1. 继承paddle.nn.Layer
"""
def __init__(self, window_size=11, channel=3, data_range=255., sigma=1.5):
"""
2. 构造函数根据自己的实际算法需求和使用需求进行参数定义即可
"""
super(SSIMLoss, self).__init__()
self.data_range = data_range
self.C = [0.01, 0.03]
self.window_size = window_size
self.channel = channel
self.sigma = sigma
self.window = create_window(self.window_size, self.sigma, self.channel)
# print(self.window_size,self.window.shape)
def forward(self, input, label):
"""
3. 实现forward函数,forward在调用时会传递两个参数:input和label
- input:单个或批次训练数据经过模型前向计算输出结果
- label:单个或批次训练数据对应的标签数据
接口返回值是一个Tensor,根据自定义的逻辑加和或计算均值后的损失
"""
# 使用Paddle中相关API自定义的计算逻辑
# output = xxxxx
# return output
return 1-_ssim(input, label,data_range = self.data_range,
window = self.window, window_size=self.window_size, channel=3,
size_average=True,C=self.C)
class MS_SSIMLoss(paddle.nn.Layer):
"""
1. 继承paddle.nn.Layer
"""
def __init__(self,data_range=255., channel=3, window_size=11, sigma=1.5):
"""
2. 构造函数根据自己的实际算法需求和使用需求进行参数定义即可
"""
super(MS_SSIMLoss, self).__init__()
self.data_range = data_range
self.C = [0.01, 0.03]
self.window_size = window_size
self.channel = channel
self.sigma = sigma
self.window = create_window(self.window_size, self.sigma, self.channel)
# print(self.window_size,self.window.shape)
def forward(self, input, label):
"""
3. 实现forward函数,forward在调用时会传递两个参数:input和label
- input:单个或批次训练数据经过模型前向计算输出结果
- label:单个或批次训练数据对应的标签数据
接口返回值是一个Tensor,根据自定义的逻辑加和或计算均值后的损失
"""
# 使用Paddle中相关API自定义的计算逻辑
# output = xxxxx
# return output
return 1-ms_ssim(input, label, data_range=self.data_range,
window = self.window, window_size=self.window_size, channel=self.channel,
size_average=True, sigma=self.sigma,
weights=None, C=self.C)
class PSNRLoss(paddle.nn.Layer):
def __init__(self):
super(PSNRLoss, self).__init__()
def forward(self, input, label):
return 100 - 20 * paddle.log10( ((input - label)**2).mean(axis = [1,2,3])**-0.5 )
In [ ]
model = UNet()
model.train()
train_dataset=MyDateset()
# 需要接续之前的模型重复训练可以取消注释
param_dict = paddle.load('./model.pdparams')
model.load_dict(param_dict)
train_dataloader = paddle.io.DataLoader(
train_dataset,
batch_size=16,
shuffle=True,
drop_last=False)
losspsnr = PSNRLoss()
lossfn = SSIMLoss(window_size=3,data_range=1)
max_epoch=1
scheduler = paddle.optimizer.lr.CosineAnnealingDecay(learning_rate=0.001, T_max=max_epoch)
opt = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters())
now_step=0
for epoch in range(max_epoch):
for step, data in enumerate(train_dataloader):
now_step+=1
img, label = data
pre = model(img)
loss1 = lossfn(pre,label).mean()
loss2 = losspsnr(pre,label).mean()
loss = (loss1+loss2/100)/2
loss.backward()
opt.step()
opt.clear_gradients()
if now_step%100==0:
print("epoch: {}, batch: {}, loss is: {}".format(epoch, step, loss.mean().numpy()))
paddle.save(model.state_dict(), 'model.pdparams')
本题目提交需要提交对应的模型和预测文件。predict.py需要读取同目录下的模型信息,预测去水印后的图片并保存。
想要自定义训练模型,只需要将predict.py中的模型和process函数中的do something 替换为自己的模型内容即可。
直接用UNet处理的结果可能不够理想,并非所有的情况都需要通过修正网络来解决。以下述情况为例,把在某个阈值内的颜色都设定为黑色(字的颜色)/白色(背景的颜色),可以让处理结果更契合人眼的需求。在predict.py中已经通过以下语句包含了这样的处理策略:
pre[pre>0.9]=1
pre[pre<0.1]=0
有无上述语句的图片效果如下:
with watermark | without post-treat | with post-treat |
---|---|---|
In [86]
# 压缩可提交文件
! zip submit.zip model.pdparams predict.py
updating: model.pdparams (deflated 7%) updating: predict.py (deflated 69%)
是不是想知道自己训练后的网络去除水印之后的图片到底长啥样?直接下载测试集A看看效果吧~
In [ ]
! wget https://staticsns.cdn.bcebos.com/amis/2022-4/1649745356784/watermark_test_datasets.zip
! unzip -oq watermark_test_datasets.zip
! rm -rf watermark_test_datasets.zip
In [ ]
! python predict.py watermark_test_datasets/images results
预测结束之后,打开results文件夹就能看到去除水印的图片了~
以图片bg_image_00005_0002.jpg为例
with watermask | without watermask |
---|---|
本项目使用极简的方式完成了百度网盘AI大赛-图像处理挑战赛:水印智能消除赛。项目有极大的改进的空间。比如:
最后,祝大家都能有好成绩!
请点击此处查看本环境基本用法.
Please click here for more detailed instructions.