接上文,下面来介绍train_softmax.py的最后一部分代码,验证部分的代码:
evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder,embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer)
具体定义:
def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder,
embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, log_dir, step, summary_writer):
start_time = time.time()
# Run forward pass to calculate embeddings
print('Runnning forward pass on LFW images')
# Enqueue one epoch of image paths and labels
labels_array = np.expand_dims(np.arange(0, len(image_paths)), 1)
image_paths_array = np.expand_dims(np.array(image_paths), 1)
sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array})
embedding_size = embeddings.get_shape()[1]
nrof_images = len(actual_issame) * 2
assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size'
nrof_batches = nrof_images // batch_size
emb_array = np.zeros((nrof_images, embedding_size))
lab_array = np.zeros((nrof_images,))
for _ in range(nrof_batches):
feed_dict = {phase_train_placeholder: False, batch_size_placeholder: batch_size}
emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict)
lab_array[lab] = lab
emb_array[lab] = emb
assert np.array_equal(lab_array, np.arange(nrof_images)) == True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline'
_, _, accuracy, val, val_std, far = lfw.evaluate(emb_array, actual_issame, nrof_folds=nrof_folds)
print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
lfw_time = time.time() - start_time
# Add validation loss and accuracy to summary
summary = tf.Summary()
# pylint: disable=maybe-no-member
summary.value.add(tag='lfw/accuracy', simple_value=np.mean(accuracy))
summary.value.add(tag='lfw/val_rate', simple_value=val)
summary.value.add(tag='time/lfw', simple_value=lfw_time)
summary_writer.add_summary(summary, step)
with open(os.path.join(log_dir, 'lfw_result.txt'), 'at') as f:
f.write('%d\t%.5f\t%.5f\n' % (step, np.mean(accuracy), val))
首先看代码:
labels_array = np.expand_dims(np.arange(0, len(image_paths)), 1)
image_paths_array = np.expand_dims(np.array(image_paths), 1)
sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array})
labels_array = np.expand_dims(np.arange(0, len(image_paths)), 1),根据图像的数量生成0、1、2......的编号,labels_array格式为
array([[0],
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9]]
.......)
接着执行image_paths_array = np.expand_dims(np.array(image_paths), 1),由之前数据分析部分可以看到,这里image_paths(lfw_paths)为两个lfw人脸图片对应的全路径文件组成的组元,最后将图像和labels送入队列。接下来运行
emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict)
通过读队列操作将lfw图像送入, 提取embedding特征emb以及label:labels,接下来:
lab_array[lab] = lab
emb_array[lab, :] = emb
由于人脸图片是按照0,1,2,3....顺序编号的,故这里通过label编号来设定对应位置图片的emb特征和label。
最后调用flw的评测函数:
_, _, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
lfw的evaluate代码如下:
def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
# Calculate evaluation metrics
thresholds = np.arange(0, 4, 0.01)#thresholds从0到4以0.01为步长均匀取值,故
#thrsholds是一个包含400个值的数组。
embeddings1 = embeddings[0::2]#以索引0开始,每隔一个元素,取一个元素。
embeddings2 = embeddings[1::2]#以索引1开始,每隔一个元素,取一个元素。
tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
thresholds = np.arange(0, 4, 0.001)
val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
return tpr, fpr, accuracy, val, val_std, far
embeddings1 和embeddings2分别为两个输入人脸图片的特征,随后调用facenet.calculate_roc计算准确度:
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])#比较的人脸样本对数。
nrof_thresholds = len(thresholds)#0到4均匀取400个值,故这里为400。
k_fold = KFold(n_splits=nrof_folds, shuffle=False)#按照10:1对样本进行切分。
tprs = np.zeros((nrof_folds,nrof_thresholds))#10*400
fprs = np.zeros((nrof_folds,nrof_thresholds))#10*400
accuracy = np.zeros((nrof_folds))#400
indices = np.arange(nrof_pairs)#0,1,......,nrof_pairs-1
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
#按行求两个特征embeddings1和embeddings2的平方距离。
# Find the best threshold for the fold
acc_train = np.zeros((nrof_thresholds))
#400个不同门限下,训练样本的准确率。
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
#计算准确率
best_threshold_index = np.argmax(acc_train)
#找到准确率最高的门限。
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
#400个不同门限下,测试样本的准确率。
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
#最佳门限下的准确率。
tpr = np.mean(tprs,0)
fpr = np.mean(fprs,0)
return tpr, fpr, accuracy
通过将样本切分为训练样本和测试样本,在训练样本中找出最佳门限,之后计算最佳门限下测试样本的准确率,以及不同们先下测试样本和训练样本的准确率。最后调用calculate_val,得到准确率的均值和方差。
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
val = np.zeros(nrof_folds)
far = np.zeros(nrof_folds)
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
# Find the threshold that gives FAR = far_target
far_train = np.zeros(nrof_thresholds)
for threshold_idx, threshold in enumerate(thresholds):
_, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
if np.max(far_train)>=far_target:
f = interpolate.interp1d(far_train, thresholds, kind='slinear')
threshold = f(far_target)
else:
threshold = 0.0
val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
val_mean = np.mean(val)
far_mean = np.mean(far)
val_std = np.std(val)
return val_mean, val_std, far_mean