sklearn练习

step:

sklearn练习_第1张图片


code:

from sklearn import cross_validation
from sklearn import datasets
from sklearn import metrics
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
from sklearn.model_selection import KFold


#评估交叉验证的性能
def evaluation(y_test,pred):
	acc = metrics.accuracy_score(y_test,pred)
	f1 = metrics.f1_score(y_test,pred)
	auc = metrics.roc_auc_score(y_test,pred)
	print ("Accuracy: ",acc)
	print ("F1-score: ",f1)
	print ("AUC ROC : ",auc)


#创建一个分类数据集
dataset = datasets.make_classification(n_samples = 1500,n_features = 10)
index = 1
#使用10倍交叉验证分割数据集
kf = cross_validation.KFold(len(dataset[0]),n_folds=10,shuffle=True)
for train_index,test_index in kf:
	X_train,y_train = dataset[0][train_index],dataset[1][train_index]
	X_test,y_test = dataset[0][test_index],dataset[1][test_index]

	print("test ",index) #给每个测试标记
	index += 1

	#训练算法
	#(1) GaussianNB
	#Native Bayes
	clf = GaussianNB()
	clf.fit(X_train,y_train)
	pred = clf.predict(X_test)
	print ("GaussianNB's evaluation: ")
	evaluation(y_test,pred)

	print("-----------------")

	#(2)SVC
	clf = SVC(C=1e-01,kernel='rbf',gamma=0.1)
	clf.fit(X_train,y_train)
	pred = clf.predict(X_test)
	print("SVC's evaluation: ")
	evaluation(y_test,pred)

	print('-----------------')

	#(3) RandomForestClassifier
	clf = RandomForestClassifier(n_estimators=10)
	clf.fit(X_train,y_train)
	pred = clf.predict(X_test)
	print("RandomForestClassifier's evaluation: ")
	evaluation(y_test,pred)

	print('-----------------')

部分output:

test  1
GaussianNB's evaluation:
Accuracy:  0.94
F1-score:  0.9419354838709678
AUC ROC :  0.9406116642958747
-----------------
SVC's evaluation:
Accuracy:  0.9466666666666667
F1-score:  0.948051948051948
AUC ROC :  0.947190611664296
-----------------
RandomForestClassifier's evaluation:
Accuracy:  0.96
F1-score:  0.9594594594594594
AUC ROC :  0.9599928876244666
-----------------
test  6
GaussianNB's evaluation:
Accuracy:  0.92
F1-score:  0.925
AUC ROC :  0.9199999999999999
-----------------
SVC's evaluation:
Accuracy:  0.96
F1-score:  0.9615384615384615
AUC ROC :  0.9600000000000001
-----------------
RandomForestClassifier's evaluation:
Accuracy:  0.9866666666666667
F1-score:  0.9868421052631579
AUC ROC :  0.9866666666666667
-----------------
test  10
GaussianNB's evaluation:
Accuracy:  0.9133333333333333
F1-score:  0.9115646258503401
AUC ROC :  0.9160714285714286
-----------------
SVC's evaluation:
Accuracy:  0.9533333333333334
F1-score:  0.9510489510489512
AUC ROC :  0.9544642857142858
-----------------
RandomForestClassifier's evaluation:
Accuracy:  0.9666666666666667
F1-score:  0.9640287769784173
AUC ROC :  0.9660714285714287
-----------------

通过将10个test的数据绘制成折线图可以更直观评估

import matplotlib.pyplot as plt 
x = [1,2,3,4,5,6,7,8,9,10]
# acc = [0.94,0.9333, 0.9333, 0.9466 ,0.9333 ,0.92, 0.96, 0.9467 ,0.9467, 0.9133]
# f1 = [0.9419, 0.9411, 0.9324  ,0.9480,0.9390 ,0.925, 0.9552 ,0.9487, 0.9500 ,0.9115 ]
# auc = [0.9406, 0.9289 ,0.9366, 0.9472, 0.9312, 0.9199 ,0.9610 ,0.9466 ,0.9455, 0.9160]
# acc = [ 0.9466, 0.9533, 0.9533, 0.96, 0.9666, 0.96, 0.9733, 0.9933, 0.9533,  0.9533]
# f1 = [ 0.9480, 0.9590, 0.9517, 0.9605, 0.9696,  0.9615, 0.9701, 0.9935, 0.9565, 0.9510 ]
# auc = [ 0.9471, 0.9485, 0.9553, 0.9603, 0.9642, 0.9600, 0.9745, 0.9935, 0.9519, 0.9544]
acc = [ 0.96, 0.9533, 0.9666, 0.98, 0.9666, 0.9866, 0.9866, 0.98, 0.98, 0.9666]
f1 = [  0.9594, 0.9585, 0.9650, 0.9798, 0.9685, 0.9868, 0.9846,  0.9806,  0.9808, 0.9640]
auc = [ 0.9599, 0.9497, 0.9678, 0.9800, 0.9669, 0.9866, 0.9848, 0.9802, 0.9797, 0.9660]
plt.title("Evaluation of RandomForestClassifier",fontsize=14)
plt.xlabel("test",fontsize=14)
plt.ylabel("evaluation",fontsize=14)

plt.plot(x,acc,label="$Accuracy$",color = "red")
plt.plot(x,f1,label = "$F1-score$",color = "blue")
plt.plot(x,auc,label = "$AUC ROC$",color = "green")

plt.legend()
plt.show()

GaussionNB:

sklearn练习_第2张图片


SVC:

sklearn练习_第3张图片

RandomForestClassifier:

sklearn练习_第4张图片


你可能感兴趣的:(sklearn练习)