import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
函数classifier_evaluation(clf,x_set,y_set,k_fold=10,evaluation_set=['accuracy'])。 功能:计算分类器clf的K折交叉验证的每一组样本在测试集上的性能评估指标; 参数: clf:一个分类器, x_set:样本数据集, y_set:每个样本的类标数据集, k_fold:K折交叉验证的K值,默认10, evaluation_set:由性能评估指标组成的列表,可以是['accuracy','precision','roc_auc','recall','f1']的子集,默认['accuracy']; 返回: K折交叉验证每组样本的性能评估指标值和均值,类型是pandas.DataFrame。
def classifier_evaluation(clf,x_set,y_set,k_fold=10,evaluation_set=['accuracy']):
oneclf_scores_df=pd.DataFrame({item:cross_val_score(clf,x_set,y_set,cv=k_fold,scoring=item) for item in evaluation_set})
oneclf_scores_df.loc['ave']=oneclf_scores_df.mean()
return oneclf_scores_df
函数classifiers_evaluation(clf,x_set,y_set,k_fold=10,evaluation_set=['accuracy'])。 功能:用K折交叉验证计算一组分类器中每一个分类器的性能评估指标; 参数: clf:一组分类器, x_set:样本数据集, y_set:每个样本的类标数据集, k_fold:K折交叉验证的K值,默认10, evaluation_set:由性能评估指标组成的列表,可以是['accuracy','precision','roc_auc','recall','f1']的子集,默认['accuracy']; 返回: 每个分类器的性能评估指标,取K折交叉验证每组样本的均值,类型是pandas.DataFrame。
def classifiers_evaluation(clf,x_set,y_set,k_fold=10,evaluation_set=['accuracy']):
clfs_scores_df=pd.DataFrame(columns=evaluation_set)
for clf_item in clf:
oneclf_scores_df=pd.DataFrame({item:cross_val_score(clf_item,x_set,y_set,cv=k_fold,scoring=item) for item in evaluation_set})
clfs_scores_df.loc[str(clf_item)]=oneclf_scores_df.mean()
return clfs_scores_df
调用测试
classifier1=GaussianNB()
evaluation_set=['accuracy','precision','roc_auc','recall','f1']
classifier_evaluation(classifier1,data.loc[:,'Aa':'Sh'], data['Class'],5,evaluation_set)
rf_clfs=[RandomForestClassifier(n_estimators=i, max_depth=None, min_samples_split=2, random_state=0) for i in range(10,110,10)]
evaluation_set=['accuracy','precision','roc_auc','recall','f1']
classifiers_evaluation(rf_clfs,data.loc[:,'Aa':'Sh'], data['Class'],10,evaluation_set)