#
#作者:韦访
#博客:https://blog.csdn.net/rookie_wei
#微信:1007895847
#添加微信的备注一下是CSDN的
#欢迎大家一起学习
#
------韦访 20190509
接着,来看evaluate函数,这个函数传了一堆参数进来,
# 下面是韦访修改,用来将队列中的图片保存到本地
# def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder,
# embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization, image_batch):
def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder,
embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization):
# Run forward pass to calculate embeddings
print('Runnning forward pass on LFW images')
# Enqueue one epoch of image paths and labels
# pairs.txt的每一行都有两张图片,而每张图片都得计算embedding,所以这里要乘以2
nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair
print('nrof_embeddings:', nrof_embeddings)
# 如果翻转图片,那实际上图片数量又多了一番,否则不多
nrof_flips = 2 if use_flipped_images else 1
print('nrof_flips:', nrof_flips)
nrof_images = nrof_embeddings * nrof_flips
print('nrof_images:', nrof_images)
# 保存label的
labels_array = np.expand_dims(np.arange(0,nrof_images),1)
print('labels_array:', labels_array)
# 保存图片路径的
image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1)
print('image_paths_array:', image_paths_array)
# 保存control的
control_array = np.zeros_like(labels_array, np.int32)
print('control_array:', control_array)
if use_fixed_image_standardization:
control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION
# 是否翻转图片
if use_flipped_images:
# Flip every second image
control_array += (labels_array % 2)*facenet.FLIP
print(control_array)
# 将数据塞进队列
sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array})
embedding_size = int(embeddings.get_shape()[1])
print('embedding_size:', embeddings.get_shape())
assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size'
nrof_batches = nrof_images // batch_size
print('nrof_batches:', nrof_batches)
emb_array = np.zeros((nrof_images, embedding_size))
print('emb_array:', emb_array.shape)
lab_array = np.zeros((nrof_images,))
# 批量计算 embeddings 和 labels
for i in range(nrof_batches):
feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size}
emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict)
lab_array[lab] = lab
emb_array[lab, :] = emb
if i % 10 == 9:
print('.', end='')
sys.stdout.flush()
# 下面是韦访添加,用来将队列中的图片保存到本地
# for i in range(nrof_batches):
# feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size}
# emb, imgs, lab = sess.run([embeddings, image_batch, labels], feed_dict=feed_dict)
# for img,l in zip(imgs, lab):
# scipy.misc.toimage(img).save('images/FLIP/images_%d.jpg' % l)
#
# lab_array[lab] = lab
# emb_array[lab, :] = emb
# if i % 10 == 9:
# print('.', end='')
# sys.stdout.flush()
print('')
embeddings = np.zeros((nrof_embeddings, embedding_size*nrof_flips))
print('embeddings:', embeddings.shape)
if use_flipped_images:
# Concatenate embeddings for flipped and non flipped version of the images
embeddings[:,:embedding_size] = emb_array[0::2,:]
embeddings[:,embedding_size:] = emb_array[1::2,:]
else:
embeddings = emb_array
assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, possibly caused by training examples left in the input pipeline'
上面都还比较好理解,主要是计算所有图片的特征embeddings和其对应的label,我们主要来看它得到特征以后怎么处理。
代码如下,
# 计算准确率, distance_metric为距离的计算方法,0为欧几里德,1为余弦相似度
tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(embeddings, actual_issame, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy)))
print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
auc = metrics.auc(fpr, tpr)
print('Area Under Curve (AUC): %1.3f' % auc)
eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
print('Equal Error Rate (EER): %1.3f' % eer)
主要看一下lfw.evaluate函数,
def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
# [0::2]中,0表示从0开始,2表示步长,embeddings[1::2]类似,
# 则embeddings1和embeddings2对应的相同的下标为要对比的图片
embeddings1 = embeddings[0::2]
embeddings2 = embeddings[1::2]
# Calculate evaluation metrics
# 阈值
thresholds = np.arange(0, 4, 0.01)
# accuracy:预测的准确率
tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
thresholds = np.arange(0, 4, 0.001)
val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2,
np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
return tpr, fpr, accuracy, val, val_std, far
来看下面的代码,
# [0::2]中,0表示从0开始,2表示步长,embeddings[1::2]类似,
# 则embeddings1和embeddings2对应的相同的下标为要对比的图片
embeddings1 = embeddings[0::2]
embeddings2 = embeddings[1::2]
这个很好理解啊,步长都为2,也就是隔一个取,只是开始不一样,那是不是就相当于一个取了偶数,另一个取了奇数的下标啊。组成了两个新的数组,这两新数组对应的相同的下标就是要对比的图片的特征,也就是pairs.txt同一行所对应的两张图片的特征。好,接着往下看,重点来看facenet.calculate_roc函数,
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
assert(embeddings1.shape[0] == embeddings2.shape[0])
assert(embeddings1.shape[1] == embeddings2.shape[1])
nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
nrof_thresholds = len(thresholds)
# 交叉验证
k_fold = KFold(n_splits=nrof_folds, shuffle=False)
tprs = np.zeros((nrof_folds,nrof_thresholds))
fprs = np.zeros((nrof_folds,nrof_thresholds))
accuracy = np.zeros((nrof_folds))
indices = np.arange(nrof_pairs)
for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
if subtract_mean:
mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
else:
mean = 0.0
#计算对应的两张图片之间的特征的距离
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
# Find the best threshold for the fold
# 找出最佳阈值
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
# acc_train 最大对应的下标即为最佳阈值所对应的下标
best_threshold_index = np.argmax(acc_train)
print('best_threshold_index:', thresholds[best_threshold_index])
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
# 求预测正确的概率
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
# 按列求均值
tpr = np.mean(tprs,0)
fpr = np.mean(fprs,0)
return tpr, fpr, accuracy
我不知道这个函数命名中的roc是哪个英文单词的缩写,看这个KFold函数,这个叫K折交叉验证法,不明白的可以回去看一下,要不然你看不懂下面的代码的,链接如下,
https://blog.csdn.net/rookie_wei/article/details/83998332
这里只挑重要的函数来讲啊,来看distance函数,
#计算对应的两张图片之间的特征的距离
dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
这个就是求对应的图片特征之间的距离,实现如下,
def distance(embeddings1, embeddings2, distance_metric=0):
if distance_metric==0:
# Euclidian distance
diff = np.subtract(embeddings1, embeddings2)
dist = np.sum(np.square(diff),1)
elif distance_metric==1:
# Distance based on cosine similarity
dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
similarity = dot / norm
dist = np.arccos(similarity) / math.pi
else:
raise 'Undefined distance metric %d' % distance_metric
return dist
得到距离以后,我们就需要有阈值来判断它们是否是同一个人啦,那么这个阈值怎么设?这里给的方法是,根据一部分数据来计算,因为我们本来就知道待对比的两张图片是否是同一个人,我们给出几百个差距很小的阈值,然后一个一个的去计算,得到准确率最高的那个,看代码,
# 找出最佳阈值
acc_train = np.zeros((nrof_thresholds))
for threshold_idx, threshold in enumerate(thresholds):
_, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
看看calculate_accuracy函数做了什么,
# 求,
# 在属于同一个人的情况下,距离小于阈值的个数占比多大
# 在不属于同一个人的情况下,距离小于阈值的个数占比多大
# 以这个数为阈值时,预测正确的概率
def calculate_accuracy(threshold, dist, actual_issame):
# dist < threshold 则为true,否则为false
predict_issame = np.less(dist, threshold)
# np.logical_and(x1, x2), 逻辑与,如果x1和x2同事为true,则返回true,否则返回false
# np.sum, 向量/数组/矩阵的求和运算,这里因为传入的值只有True和False,True相当于1,False相当于0
# 距离既小于阈值,又属于同一个人的个数
tp = np.sum(np.logical_and(predict_issame, actual_issame))
# 距离既小于阈值,又不属于同一个人的个数
fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
# 距离既大于阈值,又不属于同一个人的个数
tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
# 距离既大于阈值,又属于同一个人的个数
fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
# 在属于同一个人的情况下,距离小于阈值的个数占比多大
tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
# 在不属于同一个人的情况下,距离小于阈值的个数占比多大
fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
# print('----------------')
# print('threshold:', threshold)
# print('tpr:', tpr)
# print('fpr:', fpr)
# (距离既小于阈值,又属于同一个人的个数) 和 (距离既大于阈值,又不属于同一个人的个数),
# 这两个条件其实就是用以这个数为阈值时,预测准确了多少人,
# 这个两个数之和再除以总数,其实就是以这个数为阈值时,预测的准确率,这个数最大时,对应的阈值肯定就是最好了啦
acc = float(tp+tn)/dist.size
return tpr, fpr, acc
注释都写的比较详细了,哪里不明白的就自己print看了,
得到最佳阈值以后,用测试集和最佳阈值再带入到calculate_accuracy函数,就可以求得每组预测的准确率了,
for threshold_idx, threshold in enumerate(thresholds):
tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
# 求预测正确的概率
_, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
然后再求准确率的均值,就是最后的准确率了,
print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy)))
带注释的源码链接:
https://download.csdn.net/download/rookie_wei/11172357
还有一些其他的参数我就不去分析了。可以看到,人脸识别使用的主要思路就是,先获取要对比的图片的特征,然后再求它们之间的距离,然后,给个阈值,如果距离小于这个阈值就是同一个人脸,如果大于就不是,就这么简单。
还有大家如果加我微信,我会拉你们进群,有事尽量在群里讨论,如果都私聊我我是忙不过来的,也达不到我建群的目的,大家一起学习进步才是目的。