将数据写成TFRecord二进制文件格式(参考 https://s0www0tensorflow0org.icopy.site/tutorials/load_data/tfrecord)。
写一个TFRecord文件大概有3个流程:
def data_to_tfrecord(noisy_dir, gt_dir, tfrecord_save_dir):
""" Save data into TFRecord """
noisy_list = os.listdir(noisy_dir)
for id in range(len(noisy_list)):
noisy_img = Image.open(os.path.join(noisy_dir, noisy_list[id]))
gt_name = noisy_list[id].replace('NOISY', 'GT')
gt_img = Image.open(os.path.join(gt_dir, gt_name))
noisy_np = np.asarray(noisy_img).astype(np.float32) / 255.0
gt_np = np.asarray(gt_img).astype(np.float32) / 255.0
crop_size = 224
W, H, _ = noisy_np.shape
for repeate_time in range(8):
xx = np.random.randint(1, H - crop_size-1)
yy = np.random.randint(1, W - crop_size-1)
noisy_patch = noisy_np[yy:yy + crop_size, xx:xx + crop_size, :]
gt_patch = gt_np[yy:yy + crop_size, xx:xx + crop_size, :]
assert noisy_patch.shape == gt_patch.shape
noisy_byte = noisy_patch.tobytes()
gt_byte = gt_patch.tobytes()
save_name = noisy_list[id].split('.')[0]
save_tfrecord_file = os.path.join(tfrecord_save_dir, save_name + '_' + str(repeate_time) + '.tfrecord')
if os.path.isfile(save_tfrecord_file):
print("%s exists" % save_tfrecord_file)
os.remove(save_tfrecord_file)
writer = tf.io.TFRecordWriter(save_tfrecord_file)
example = tf.train.Example(features=tf.train.Features(
feature={
'noisy': _bytes_feature(noisy_byte),
'gt': _bytes_feature(gt_byte)
}
)
)
writer.write(example.SerializeToString()) # Serialize To String
writer.close()
读入数据是写TFRecord文件数据的逆过程,注意数据格式和reshape大小要分别和写入时保持一致。
def get_noisy_gt_pair(tfrecord_lists, crop_size = crop_size):
def parser(record):
name_to_features = {
'noisy': tf.FixedLenFeature([], tf.string),
'gt': tf.FixedLenFeature([], tf.string)
}
example = tf.parse_single_example(record, features=name_to_features)
noisy = example["noisy"]
gt = example["gt"]
noisy = tf.decode_raw(noisy, tf.float32)
noisy = tf.reshape(noisy, [crop_size, crop_size, 3])
gt = tf.decode_raw(gt, tf.float32)
gt = tf.reshape(gt, [crop_size, crop_size, 3])
return noisy, gt
dataset = tf.data.TFRecordDataset(tfrecord_lists).repeat()
dataset = dataset.map(parser).batch(BATCH_SIZE).shuffle(buffer_size=BUFFER_SIZE)
iterator = dataset.make_one_shot_iterator()
noisy, gt = iterator.get_next()
return noisy, gt
在多GPU上训练涉及到模型参数的复用,要设置reuse=tf.AUTO_REUSE
def dncnn(input, is_training=True, output_channels=3):
with tf.variable_scope('denoising', reuse=tf.AUTO_REUSE):
with tf.variable_scope('block1'):
output = tf.layers.conv2d(input, 32, 3, padding='same', activation=tf.nn.relu)
for layers in range(2, 11+1):
with tf.variable_scope('block%d' % layers):
output = tf.layers.conv2d(output, 32, 3, padding='same', name='conv%d' % layers, use_bias=False)
output = tf.nn.relu(tf.layers.batch_normalization(output, training=is_training))
with tf.variable_scope('block17'):
output = tf.layers.conv2d(output, output_channels, 3, padding='same',use_bias=False)
return input - output
需要处理训练数据的各个GPU分配和各个GPU的梯度。
for i in range(N_GPU):
with tf.device('/gpu:%d' % i):
with tf.name_scope('GPU_%d' % i) as scope:
cur_loss0, output_long0, input_GT = get_loss(x0, y_, scope, reuse_variables)
reuse_variables = True
grads0 = opt.compute_gradients(cur_loss0)
tower_grads.append(grads0)
def average_gradients(tower_grads):
average_grads = []
for grad_and_vars in zip(*tower_grads):
grads = []
for g, _ in grad_and_vars:
expanded_g = tf.expand_dims(g, 0)
grads.append(expanded_g)
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
每个GPU的利用率差不多90%:
完整工程下载地址: https://download.csdn.net/download/dcrmg/12667428
工程文件简单说明:
1. util.py 生成TFRecord文件
2. train.py 数据并行多GPU训练一个image2image模型(dncnn降噪)
3. test.py 推理测试