最近在看关于图像质量评价的相关论文,理解不当之处还请包涵。废话不多说直接上内容:
这篇文章使用深度学习的方法来实现全参考的图像质量评价和无参考的图像质量评价的方法,这里简单说一下所谓全参考图像质量评价是指,在度量图像的质量过程中需要用到参考图片和退化图片进行比较的方法,而无参考的图像质量评价是指找到退化图像及其主观分数之间存在的某种映射关系。
总体而言,这篇文章就是利用神经网络的非线性拟合能力来拟合图像到主观分数之间的映射关系。这篇文章的全参考网络结构如下图所示:
网络的输入是退化图像块和对应的参考图像块。这些图像块是从原来图像中随机裁剪下来的,尺寸为32X32X3,32是指宽和高,后面3是指通道数。至于裁剪多少张作者在论文中分析了裁剪数目对模型的影响,这里我用32张为代表。标签信息是退化图像的主观分数(这点只是个人理解)我在复现的时候也用的这样方法。但效果没有作者那么好!所以有些奇怪。如果有不同的见解可以在下方留言,和讨论。输入之后是权重共享的孪生网络。这里作者应该是认为CNN在这里只是起到了统计特征提取的作用,并没考虑输入图像对的差异性。然后作者将提取的特征做差和原始特征concatenate在一起作为新的特征。这里比较有点奇怪的是特征相减能够描述退化图像的高维特征和原始图像的高维特征之间的距离不知道作者为啥要把原始特征也融合进来。后面是两个并行的全连接网络作者将上支路作为图像块的权重。作者认为图像块在整个图像中的重要性是不一样的因此需要让网络学习这种图像块的权重。下支路是图像块的预测分数,最后的pooling我认为就是分数加权的过程。
这里Q表示输出yi是预测分数wi是权重。具体的网络参数CNN网络在上图中表示的很清楚利用VGG网络提取特征。后面的上支路全连接第一层输出为512用relu激活,然后接一个dropout层保持的概率为0.5,然后接一个relu激活的输出为1 的全连接再加上一个很小的epsilon =0.000001保证权重的值为正,下之路类似上支路只是第二层全连接没有用relu激活也没加epsilon。最后损失函数利用的是绝对值损失函数。优化器利用Adam。
这里是作者提供的代码:https://github.com/dmaniry/deepIQA
我在本地没有安装作者用的框架就利用tensorflow复现了一下训练代码如下:
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 3 10:56:43 2018
@author: xxxxx
"""
import tensorflow as tf
import scipy
import os
import numpy as np
slim=tf.contrib.slim
lr=0.0001
score_file=scipy.io.loadmat('E:/IQA/LIVEDATASET/dmos_realigned.mat')#这里是LIVE数据集的标签文件
path='F:/2号机文件/data/train/'
def minibatches(inputs=None,inputs2=None, targets=None, batch_size=None, shuffle=False):
assert len(inputs) == len(targets)
if shuffle:
indices = np.arange(len(inputs))
np.random.shuffle(indices)
for start_idx in range(0, len(inputs) - batch_size + 1, batch_size):
if shuffle:
excerpt = indices[start_idx:start_idx + batch_size]
else:
excerpt = slice(start_idx, start_idx + batch_size)
yield inputs[excerpt],inputs2[excerpt], targets[excerpt]
def random_crop(image_ref,image_dis,num_output,size):#随机裁剪图像
h,w=image_ref.shape[:2]
random_h=np.random.randint(h-size,size=num_output)
random_w=np.random.randint(w-size,size=num_output)
patches_dis=[]
patches_ref=[]
for i in range(num_output):
patch_dis=image_dis[random_h[i]:random_h[i]+size,random_w[i]:random_w[i]+size]
patch_ref=image_ref[random_h[i]:random_h[i]+size,random_w[i]:random_w[i]+size]
patches_ref.append(patch_ref)
patches_dis.append(patch_dis)
return patches_ref,patches_dis
def read_data(path):
images_ref=[]
images_dis=[]
labels=[]
filename=os.listdir(path)
for fn in filename:
data=scipy.io.loadmat(path+fn)
print('read data :%s'%(path+fn))
imageref=data['dataref']
imagedis=data['datadis']
label=data['label'][0,0]
patches_ref,patches_dis=random_crop(imageref,imagedis,num_output=32,size=32)
for p1 in patches_ref:
images_ref.append(p1)
labels.append(label)
for p2 in patches_dis:
images_dis.append(p2)
return np.asarray(images_ref,np.float),np.asarray(images_dis,np.float),np.asarray(labels,np.float)
data_ref,data_dis,label=read_data(path)#读取数据
x_ref=tf.placeholder(tf.float32,shape=[None,32,32,3])
x_dis=tf.placeholder(tf.float32,shape=[None,32,32,3])
y=tf.placeholder(tf.float32,shape=[None,])
def vgg_feature_extrate(x):
conv1_weight=tf.get_variable(name='conv1_weight',shape=[3,3,3,32],initializer=tf.contrib.layers.xavier_initializer())
conv1_bias=tf.get_variable(name='conv1_bias',shape=[32],initializer=tf.constant_initializer(value=0.))
conv1=tf.nn.conv2d(x,conv1_weight,strides=[1,1,1,1],padding='SAME')
conv1=tf.nn.relu(conv1+conv1_bias)
conv2_weight=tf.get_variable(name='conv2_weight',shape=[3,3,32,32],initializer=tf.contrib.layers.xavier_initializer())
conv2_bias=tf.get_variable(name='conv2_bias',shape=[32],initializer=tf.constant_initializer(value=0.))
conv2=tf.nn.conv2d(conv1,conv2_weight,strides=[1,1,1,1],padding='SAME')
conv2=tf.nn.relu(conv2+conv2_bias)
pool1=tf.nn.max_pool(conv2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
conv3_weight=tf.get_variable(name='conv3_weight',shape=[3,3,32,64],initializer=tf.contrib.layers.xavier_initializer())
conv3_bias=tf.get_variable(name='conv3_bias',shape=[64],initializer=tf.constant_initializer(value=0.))
conv3=tf.nn.conv2d(pool1,conv3_weight,strides=[1,1,1,1],padding='SAME')
conv3=tf.nn.relu(conv3+conv3_bias)
conv4_weight=tf.get_variable(name='conv4_weight',shape=[3,3,64,64],initializer=tf.contrib.layers.xavier_initializer())
conv4_bias=tf.get_variable(name='conv4_bias',shape=[64],initializer=tf.constant_initializer(value=0.))
conv4=tf.nn.conv2d(conv3,conv4_weight,strides=[1,1,1,1],padding='SAME')
conv4=tf.nn.relu(conv4+conv4_bias)
pool2=tf.nn.max_pool(conv4,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
conv5_weight=tf.get_variable(name='conv5_weight',shape=[3,3,64,128],initializer=tf.contrib.layers.xavier_initializer())
conv5_bias=tf.get_variable(name='conv5_bias',shape=[128],initializer=tf.constant_initializer(value=0.))
conv5=tf.nn.conv2d(pool2,conv5_weight,strides=[1,1,1,1],padding='SAME')
conv5=tf.nn.relu(conv5+conv5_bias)
conv6_weight=tf.get_variable(name='conv6_weight',shape=[3,3,128,128],initializer=tf.contrib.layers.xavier_initializer())
conv6_bias=tf.get_variable(name='conv6_bias',shape=[128],initializer=tf.constant_initializer(value=0.))
conv6=tf.nn.conv2d(conv5,conv6_weight,strides=[1,1,1,1],padding='SAME')
conv6=tf.nn.relu(conv6+conv6_bias)
pool3=tf.nn.max_pool(conv6,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
conv7_weight=tf.get_variable(name='conv7_weight',shape=[3,3,128,256],initializer=tf.contrib.layers.xavier_initializer())
conv7_bias=tf.get_variable(name='conv7_bias',shape=[256],initializer=tf.constant_initializer(value=0.))
conv7=tf.nn.conv2d(pool3,conv7_weight,strides=[1,1,1,1],padding='SAME')
conv7=tf.nn.relu(conv7+conv7_bias)
conv8_weight=tf.get_variable(name='conv8_weight',shape=[3,3,256,256],initializer=tf.contrib.layers.xavier_initializer())
conv8_bias=tf.get_variable(name='conv8_bias',shape=[256],initializer=tf.constant_initializer(value=0.))
conv8=tf.nn.conv2d(conv7,conv8_weight,strides=[1,1,1,1],padding='SAME')
conv8=tf.nn.relu(conv8+conv8_bias)
pool4=tf.nn.max_pool(conv8,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')
conv9_weight=tf.get_variable(name='conv9_weight',shape=[3,3,256,512],initializer=tf.contrib.layers.xavier_initializer())
conv9_bias=tf.get_variable(name='conv9_bias',shape=[512],initializer=tf.constant_initializer(value=0.))
conv9=tf.nn.conv2d(pool4,conv9_weight,strides=[1,1,1,1],padding='SAME')
conv9=tf.nn.relu(conv9+conv9_bias)
conv10_weight=tf.get_variable(name='conv10_weight',shape=[3,3,512,512],initializer=tf.contrib.layers.xavier_initializer())
conv10_bias=tf.get_variable(name='conv10_bias',shape=[512],initializer=tf.constant_initializer(value=0.))
conv10=tf.nn.conv2d(conv9,conv10_weight,strides=[1,1,1,1],padding='SAME')
conv10=tf.nn.relu(conv10+conv10_bias)
pool5=tf.nn.max_pool(conv10,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID')#fr
return pool5
#pool5_1=tf.nn.max_pool(conv10_1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='VALID',name='pool5')#fd
with tf.variable_scope('feature_extrcrate') as scope:#权重共享
fr=vgg_feature_extrate(x_ref)
scope.reuse_variables()
fd=vgg_feature_extrate(x_dis)
feature_fusion=tf.concat([fd,fr,fr-fd],axis=3)
reshape1=tf.reshape(feature_fusion,[-1,512*3])
dense1 = tf.layers.dense(inputs=reshape1,
units=512,
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
drop1=tf.nn.dropout(dense1,keep_prob=0.5)
dense2= tf.layers.dense(inputs=drop1,
units=1,
activation=None,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
dense1_1 = tf.layers.dense(inputs=reshape1,
units=512,
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
drop1_1=tf.nn.dropout(dense1_1,keep_prob=0.5)
dense2_1= tf.layers.dense(inputs=drop1_1,
units=1,
activation=tf.nn.relu,
kernel_initializer=tf.truncated_normal_initializer(stddev=0.01),
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.003))
epsilon=tf.multiply(tf.ones_like(dense2_1),0.000001)
patch_weights=dense2_1+epsilon
sum_of_weight=tf.reduce_sum(patch_weights)
output=tf.reduce_sum(tf.multiply(dense2,patch_weights))/sum_of_weight
mean_score=tf.reduce_mean(y)
loss=tf.reduce_mean(tf.abs(output-mean_score))
f=open('/home/IQA/document/log.txt','a')
saver=tf.train.Saver()
train_op=tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
n_epoch=2000
batch_size=32
sess=tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
for epoch in range(n_epoch):
if epoch==1000:
lr=lr*0.1
print('training steps : %d'%(epoch))
train_loss, train_acc, n_batch = 0, 0, 0
for x_train_a,x_train_b, y_train_a in minibatches(data_ref, data_dis,label, batch_size, shuffle=False):
_,err=sess.run([train_op,loss], feed_dict={x_ref: x_train_a, x_dis:x_train_b,y: y_train_a})
train_loss += err; n_batch += 1
print(" train loss: %f" % (train_loss/ n_batch))
f.writelines(" train loss: %f" % (train_loss/ n_batch)+'\n')#将训练过程保存一下
saver.save(sess,'/home/IQA/model/model.ckpt') #模型保存这里只保存了最后一次
sess.close()
f.close()
这里需要数据集LIVE数据集的链接https://pan.baidu.com/s/1LE3lXTzM5a3PtF387HyH2A,提取码:60cm
接下来我们说一说这篇文章的无参考模型,无参考的模型实质上就是比有参考的少了一个参考图像的特征提取。模型结构如下图。
此外,无参考模型的训练是在无参考的基础上的。也就是利用上述全参考模型的权重。这里我比较奇怪因为训练数据是随机裁剪的结果,因此每次训练得到的测试的结果都不一样。我复现的结果比作者差的还是挺大的。作者提供的斯皮尔曼相关系数是0.97全参考的而我的结果是0.93左右。不太清楚是神马原因,如有知道的可以讨论一下啊。