项目地址:https://aistudio.baidu.com/aistudio/projectdetail/843989
文章来源:2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)
下载链接:Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
该项目针对的问题是:如果想让一张很小的图片变大,在一般情况下,使用 resize
操作,但是图片放大倍数越大,图像会变得越模糊。该项目解决的方法:通过神经网络对图像的分辨率进行重构,得到一张既放大又清晰的图片。
此项目选用的是2017年CVPR
的论文Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network
,这篇论文的难度不大,但它的重构思想,从学习的角度来说还是能让我们有很大收获的~
作者认为,这篇文章之前,主要重建工作都集中在最小化均方重建误差上,这篇文章是生成式对抗网络第一次应用于4倍下采样图像的超分辨重建工作。。由此得到的估计值具有较高的峰值信噪比,但它们通常缺少高频细节,并且在感觉上不令人满意,因为它们无法匹配在更高分辨率下预期的保真度。
为了达到能够在4倍放大因子下推断照片真实自然图像的目的,作者提出了一个由对抗性损失和内容损失组成的感知损失函数,该网络使用经过训练的VGG19网络来区分超分辨率图像和原始照片真实感图像,此外,在像素空间中,又使用了一个由感知相似度驱动的内容丢失,而不是像素空间中的相似性。作者的深度残差网络能够在公共基准上从大量减少采样的图像中恢复照片真实感纹理。用SRGAN获得的MOS分数比用任何最先进的方法得到的结果更接近原始高分辨率图像。
In [ ]:
# unzip dataset !unzip -q data/data50762/SRDATA.zip
In [ ]:
!wget https://paddle-gan-models.bj.bcebos.com/vgg19_spade.tar.gz !tar -zxvf vgg19_spade.tar.gz
In [ ]:
import cv2 import os import glob import tqdm import time import shutil import scipy import random import numpy as np import paddle import paddle.fluid as fluid
In [ ]:
## Adam batch_size = 32 lr = 0.001 beta1 = 0.9 use_gpu = True ## initialize G n_epoch_init = 50 ## adversarial learning (SRGAN) n_epoch = 2000 ## train set location train_hr_img_path = '/home/aistudio/srdata/DIV2K_train_HR' train_lr_img_path = '/home/aistudio/srdata/DIV2K_train_LR_bicubic/X4' ## test set location valid_hr_img_path = '/home/aistudio/srdata/DIV2K_valid_HR' valid_lr_img_path = '/home/aistudio/srdata/DIV2K_valid_LR_bicubic/X4'
In [ ]:
# load im path to list def load_file_list(im_path,im_format): return glob.glob(os.path.join(im_path, im_format))
In [ ]:
# read im to list def im_read(im_path): im_dataset = [] for i in range(len(im_path)): path = im_path[i] # imread -- bgr im_data = cv2.imread(path) # change im channels ==> bgr to rgb img = cv2.cvtColor(im_data, cv2.COLOR_BGR2RGB) #print(im_data.shape) im_dataset.append(im_data) return im_dataset
In [ ]:
def random_crop(im_set, image_size): crop_set = [] for im in im_set: #print(im.shape) # Random generation x,y h, w, _ = im.shape y = random.randint(0, h-image_size) x = random.randint(0, w-image_size) # Random screenshot cropIm = im[(y):(y + image_size), (x):(x + image_size)] crop_set.append(cropIm) return crop_set
In [ ]:
# resize im // change im channels def im_resize(imgs, im_w, im_h, pattern='rgb'): resize_dataset = [] for im in imgs: im = cv2.resize(im, (im_w, im_h), interpolation=cv2.INTER_LINEAR) resize_dataset.append(im) resize_dataset = np.array(resize_dataset,dtype='float32') return resize_dataset
In [ ]:
# data standardization def standardized(imgs): imgs = np.array([a / 127.5 - 1 for a in imgs]) return imgs
In [ ]:
# load im path to list train_hr_img_list = sorted(load_file_list(im_path=train_hr_img_path, im_format='*.png')) train_lr_img_list = sorted(load_file_list(im_path=train_lr_img_path, im_format='*.png')) valid_hr_img_list = sorted(load_file_list(im_path=valid_hr_img_path, im_format='*.png')) valid_lr_img_list = sorted(load_file_list(im_path=valid_lr_img_path, im_format='*.png'))
In [ ]:
# load im data train_hr_imgs = im_read(train_hr_img_list) train_lr_imgs = im_read(train_lr_img_list) valid_hr_imgs = im_read(valid_hr_img_list) valid_lr_imgs = im_read(valid_lr_img_list)
生成器网络的体系结构,每个卷积层对应的内核大小(k)、特征映射数(n)和步长(s)。
In [ ]:
def SRGAN_g(t_image): # Input-Conv-Relu n = fluid.layers.conv2d(input=t_image, num_filters=64, filter_size=3, stride=1, padding='SAME', name='n64s1/c', data_format='NCHW') # print('conv0', n.shape) n = fluid.layers.batch_norm(n, momentum=0.99, epsilon=0.001) n = fluid.layers.relu(n, name=None) temp = n # B residual blocks # Conv-BN-Relu-Conv-BN-Elementwise_add for i in range(16): nn = fluid.layers.conv2d(input=n, num_filters=64, filter_size=3, stride=1, padding='SAME', name='n64s1/c1/%s' % i, data_format='NCHW') nn = fluid.layers.batch_norm(nn, momentum=0.99, epsilon=0.001, name='n64s1/b1/%s' % i) nn = fluid.layers.relu(nn, name=None) log = 'conv%2d' % (i+1) # print(log, nn.shape) nn = fluid.layers.conv2d(input=nn, num_filters=64, filter_size=3, stride=1, padding='SAME', name='n64s1/c2/%s' % i, data_format='NCHW') nn = fluid.layers.batch_norm(nn, momentum=0.99, epsilon=0.001, name='n64s1/b2/%s' % i) nn = fluid.layers.elementwise_add(n, nn, act=None, name='b_residual_add/%s' % i) n = nn n = fluid.layers.conv2d(input=n, num_filters=64, filter_size=3, stride=1, padding='SAME', name='n64s1/c/m', data_format='NCHW') n = fluid.layers.batch_norm(n, momentum=0.99, epsilon=0.001, name='n64s1/b2/%s' % i) n = fluid.layers.elementwise_add(n, temp, act=None, name='add3') # print('conv17', n.shape) # B residual blacks end # Conv-Pixel_shuffle-Conv-Pixel_shuffle-Conv n = fluid.layers.conv2d(input=n, num_filters=256, filter_size=3, stride=1, padding='SAME', name='n256s1/1', data_format='NCHW') n = fluid.layers.pixel_shuffle(n, upscale_factor=2) n = fluid.layers.relu(n, name=None) # print('conv18', n.shape) n = fluid.layers.conv2d(input=n, num_filters=256, filter_size=3, stride=1, padding='SAME', name='n256s1/2', data_format='NCHW') n = fluid.layers.pixel_shuffle(n, upscale_factor=2) n = fluid.layers.relu(n, name=None) # print('conv19', n.shape) n = fluid.layers.conv2d(input=n, num_filters=3, filter_size=1, stride=1, padding='SAME', name='out', data_format='NCHW') n = fluid.layers.tanh(n, name=None) # print('conv20', n.shape) return n
鉴别器网络的体系结构,每个卷积层对应的内核大小(k)、特征映射数(n)和步长(s)。
Cnov、BN、Leaky_Relu、fc
,为了对生成网络生成的图像数据进行判断,判断其是否是真实的训练数据中的数据。In [ ]:
def SRGAN_d(input_images): # Conv-Leaky_Relu net_h0 = fluid.layers.conv2d(input=input_images, num_filters=64, filter_size=4, stride=2, padding='SAME', name='h0/c', data_format='NCHW') net_h0 = fluid.layers.leaky_relu(net_h0, alpha=0.2, name=None) # h1 Cnov-BN-Leaky_Relu net_h1 = fluid.layers.conv2d(input=net_h0, num_filters=128, filter_size=4, stride=2, padding='SAME', name='h1/c', data_format='NCHW') net_h1 = fluid.layers.batch_norm(net_h1, momentum=0.99, epsilon=0.001, name='h1/bn') net_h1 = fluid.layers.leaky_relu(net_h1, alpha=0.2, name=None) # h2 Cnov-BN-Leaky_Relu net_h2 = fluid.layers.conv2d(input=net_h1, num_filters=256, filter_size=4, stride=2, padding='SAME', name='h2/c', data_format='NCHW') net_h2 = fluid.layers.batch_norm(net_h2, momentum=0.99, epsilon=0.001, name='h2/bn') net_h2 = fluid.layers.leaky_relu(net_h2, alpha=0.2, name=None) # h3 Cnov-BN-Leaky_Relu net_h3 = fluid.layers.conv2d(input=net_h2, num_filters=512, filter_size=4, stride=2, padding='SAME', name='h3/c', data_format='NCHW') net_h3 = fluid.layers.batch_norm(net_h3, momentum=0.99, epsilon=0.001, name='h3/bn') net_h3 = fluid.layers.leaky_relu(net_h3, alpha=0.2, name=None) # h4 Cnov-BN-Leaky_Relu net_h4 = fluid.layers.conv2d(input=net_h3, num_filters=1024, filter_size=4, stride=2, padding='SAME', name='h4/c', data_format='NCHW') net_h4 = fluid.layers.batch_norm(net_h4, momentum=0.99, epsilon=0.001, name='h4/bn') net_h4 = fluid.layers.leaky_relu(net_h4, alpha=0.2, name=None) # h5 Cnov-BN-Leaky_Relu net_h5 = fluid.layers.conv2d(input=net_h4, num_filters=2048, filter_size=4, stride=2, padding='SAME', name='h5/c', data_format='NCHW') net_h5 = fluid.layers.batch_norm(net_h5, momentum=0.99, epsilon=0.001, name='h5/bn') net_h5 = fluid.layers.leaky_relu(net_h5, alpha=0.2, name=None) # h6 Cnov-BN-Leaky_Relu net_h6 = fluid.layers.conv2d(input=net_h5, num_filters=1024, filter_size=4, stride=2, padding='SAME', name='h6/c', data_format='NCHW') net_h6 = fluid.layers.batch_norm(net_h6, momentum=0.99, epsilon=0.001, name='h6/bn') net_h6 = fluid.layers.leaky_relu(net_h6, alpha=0.2, name=None) # h7 Cnov-BN-Leaky_Relu net_h7 = fluid.layers.conv2d(input=net_h6, num_filters=512, filter_size=4, stride=2, padding='SAME', name='h7/c', data_format='NCHW') net_h7 = fluid.layers.batch_norm(net_h7, momentum=0.99, epsilon=0.001, name='h7/bn') net_h7 = fluid.layers.leaky_relu(net_h7, alpha=0.2, name=None) #修改原论文网络 net = fluid.layers.conv2d(input=net_h7, num_filters=128, filter_size=1, stride=1, padding='SAME', name='res/c', data_format='NCHW') net = fluid.layers.batch_norm(net, momentum=0.99, epsilon=0.001, name='res/bn') net = fluid.layers.leaky_relu(net, alpha=0.2, name=None) net = fluid.layers.conv2d(input=net_h7, num_filters=128, filter_size=3, stride=1, padding='SAME', name='res/c2', data_format='NCHW') net = fluid.layers.batch_norm(net, momentum=0.99, epsilon=0.001, name='res/bn2') net = fluid.layers.leaky_relu(net, alpha=0.2, name=None) net = fluid.layers.conv2d(input=net_h7, num_filters=512, filter_size=3, stride=1, padding='SAME', name='res/c3', data_format='NCHW') net = fluid.layers.batch_norm(net, mom