一、评估标准
截图来源:还是强大的wiki.
二、code
- accuracy,描述预测值和真实情况的一致性。对于不平衡数据,假如大类占比98%,且模型把结果都判断为大类,accuracy=大类占比98%,会很高,然而结果没用。
- 对于不平衡数据,偏好f1.
- 使用TensorFlow方式实现。
def tf_confusion_metrics(model, actual_classes, session, feed_dict):
predictions = tf.argmax(model, 1)
actuals = tf.argmax(actual_classes, 1)
ones_like_actuals = tf.ones_like(actuals) # tf.ones_like: A `Tensor` with all elements set to 1.
zeros_like_actuals = tf.zeros_like(actuals)
ones_like_predictions = tf.ones_like(predictions)
zeros_like_predictions = tf.zeros_like(predictions)
# true positive 猜测和真实一致
tp_op = tf.reduce_sum( # tf.reduce_sum,统计1的个数
tf.cast( # tf.cast: Casts a tensor to a new type.把true变回1
tf.logical_and( # tf.logical_and: A `Tensor` of type `bool`. 把预测的true和实际的true取且操作
tf.equal(actuals, ones_like_actuals), # tf.equal:A `Tensor` of type `bool`.其实就是把1变成TRUE.
tf.equal(predictions, ones_like_predictions)
),
"float"
)
)
# true negative 猜测和真实一致
tn_op = tf.reduce_sum(
tf.cast(
tf.logical_and(
tf.equal(actuals, zeros_like_actuals),
tf.equal(predictions, zeros_like_predictions)
),
"float"
)
)
# false positive 实际是0,猜测是1
fp_op = tf.reduce_sum(
tf.cast(
tf.logical_and(
tf.equal(actuals, zeros_like_actuals),
tf.equal(predictions, ones_like_predictions)
),
"float"
)
)
# false negative 实际是1,猜测是0
fn_op = tf.reduce_sum(
tf.cast(
tf.logical_and(
tf.equal(actuals, ones_like_actuals),
tf.equal(predictions, zeros_like_predictions)
),
"float"
)
)
tp, tn, fp, fn = \
session.run(
[tp_op, tn_op, fp_op, fn_op],
feed_dict
)
with tf.name_scope("confusion_matrix"):
with tf.name_scope("precision"):
if((float(tp) + float(fp)) == 0):
precision = 0
else:
precision = float(tp)/(float(tp) + float(fp))
tf.summary.scalar("Precision",precision)
with tf.name_scope("recall"):
if((float(tp) + float(fn)) ==0):
recall = 0
else:
recall = float(tp) / (float(tp) + float(fn))
tf.summary.scalar("Recall",recall)
with tf.name_scope("f1_score"):
if((precision + recall) ==0):
f1_score = 0
else:
f1_score = (2 * (precision * recall)) / (precision + recall)
tf.summary.scalar("F1_score",f1_score)
with tf.name_scope("accuracy"):
accuracy = (float(tp) + float(tn)) / (float(tp) + float(fp) + float(fn) + float(tn))
tf.summary.scalar("Accuracy",accuracy)
print ('F1 Score = ', f1_score, ', Precision = ', precision,', Recall = ', recall, ', Accuracy = ', accuracy)
- 使用sklearn实现
import sklearn as sk
import numpy as np
from sklearn.metrics import confusion_matrix
# 打印所有的scores参数,包括precision、recall、f1等等
# y_pred_score,神经网络的预测结果,经过softmax,type:
# y_true_onehot_score,神经网络的true值输入,是one-hot编码后的type:
def scores_all(y_pred_onehot_score, y_true_onehot_score):
y_pred_score = np.argmax(y_pred_onehot_score, axis = 1) # 反one-hot编码
y_true_score = np.argmax(y_true_onehot_score, axis = 1) # 反one-hot编码
# print("precision:",sk.metrics.precision_score(y_true_score,y_pred_score), \
# "recall:",sk.metrics.recall_score(y_true_score,y_pred_score), \
# "f1:",sk.metrics.f1_score(y_true_score,y_pred_score))
print("f1:",sk.metrics.f1_score(y_true_score,y_pred_score))