precision = 正确预测的个数(TP) / 被预测正确的个数(TP+FP)
recall = 正确预测的个数(TP)/ 预测个数(FN)
f1 = 2*精准度 * 召回率 /(精度 * 召回率)
以二分类问题为例
真实\预测 | 0 | 1 |
---|---|---|
0 | 预测negative正确(TN) | 预测positive错误(FP) |
1 | 预测positive错误(FN) | 预测positive正确(TP) |
precision = TP/(TP+FP)
recall = TP/(TP+FN)
求f1_score
from sklearn.metrics import f1_score
f1_score(y_test,y_predict)
# y_test 测试集
# y_predict 预测结果
import numpy as np
from sklearn import datasets
digits = datasets.load_digits()
X = digits['data']
y = digits['target'].copy()
# 手动让digists数据9的数据偏斜
y[digits['target']==9]=1
y[digits['target']!=9]=0
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=1)
log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)
log_reg.score(X_test,y_test)
y_log_predict = log_reg.predict(X_test)
# 预测negative正确
def TN(y_true,y_predict):
return np.sum((y_true==0)&(y_predict==0))
tn = TN(y_test,y_log_predict) # tn = 399
# 预测positive错误
def FP(y_true,y_predict):
return np.sum((y_true==0)&(y_predict==1))
fp = FP(y_test,y_log_predict) # fp = 3
# 预测negative错误
def FN(y_true,y_predict):
return np.sum((y_true==1)&(y_predict==0))
fn = FN(y_test,y_log_predict) # fn = 7
# 预测positive正确
def TP(y_true,y_predict):
return np.sum((y_true==1)&(y_predict==1))
tp = TP(y_test,y_log_predict) # tp = 41
def confusion_matrix(y_true,y_predict):
return np.array([
[TN(y_true,y_predict),FP(y_true,y_predict)],
[FN(y_true,y_predict),TP(y_true,y_predict)]
])
confusion_matrix(y_test,y_log_predict)
"""
output :
array([[399, 3],
[ 7, 41]])
"""
def precision_score(y_true,y_predict):
tp = TP(y_true,y_predict)
fp = FP(y_true,y_predict)
try:
return tp/(tp+fp)
except:
return 0.0
precision_score(y_test,y_log_predict)
"""
output : 0.9318181818181818
"""
def recall_score(y_true,y_predict):
tp = TP(y_true,y_predict)
fn = FN(y_true,y_predict)
try:
return tp/(tp+fn)
except:
return 0.0
recall_score(y_test,y_log_predict)
"""
output : 0.8541666666666666
"""
以上为拆分理解 在sklearn中都可以直接求得
# 混淆矩阵
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_log_predict)
"""
output:array([[399, 3],
[ 7, 41]], dtype=int64)
"""
# 精准率
from sklearn.metrics import precision_score
precision_score(y_test,y_log_predict)
"""
output: 0.9318181818181818
"""
# z召回率
from sklearn.metrics import recall_score
recall_score(y_test,y_log_predict)
"""
output: 0.8541666666666666
"""
# classification_report
from sklearn.metrics import classification_report
print(classification_report(y_test,y_log_predict))
"""
output: precision recall f1-score support
0 0.98 0.99 0.99 402
1 0.93 0.85 0.89 48
avg / total 0.98 0.98 0.98 450
"""