其知识详见:http://blog.csdn.net/batuwuhanpei/article/details/51910349
import numpy as np
from sklearn.naive_bayes import GaussianNB
from func_plot_classifier import plot_classifier
input_file= 'data_multivar.txt'
x = []
y = []
with open(input_file,'r') as f:
for line in f.readlines():
data=[float(i) for i in line.split(',')]
x.append(data[:-1])
y.append(data[-1])
x=np.array(x)
y=np.array(y)
from sklearn import cross_validation
x_train,x_test,y_train,y_test= cross_validation.train_test_split(x,y,test_size=0.25,random_state=5)
gaussiannb_classifier = GaussianNB()
gaussiannb_classifier.fit(x_train,y_train)
y_predict = gaussiannb_classifier.predict(x_test)
accuracy= 100*(y_predict==y_test).sum()/x_test.shape[0]
print("Accuracy of the GuassianNb Classifier: ",round(accuracy,2),'%')
代码运行结果:
Accuracy of the GuassianNb Classifier: 98.0 %
plot_classifier(gaussiannb_classifier,x_test,y_test)
交叉验证
有100个样品,其中有83个及格。分类器分出73个认为及格的,但其中只有65个及格的。
精度(precision)= 65/73
召回率(recall)= 65/83
F1得分(F1 Scole)= 2*精度*召回率/(精度+召回率)
精度和召回率是二律背反的,不能同时具备。
num_validation=5
accuracy = cross_validation.cross_val_score(gaussiannb_classifier,x,y,scoring='accuracy',cv=num_validation)
print("Accuracy = ",round(100*accuracy.mean(),5),'%')
precision = cross_validation.cross_val_score(gaussiannb_classifier,x,y,scoring='precision_weighted',cv=num_validation)
print("Accuracy = ",round(100*precision.mean(),5),'%')
recall = cross_validation.cross_val_score(gaussiannb_classifier,x,y,scoring='recall_weighted',cv=num_validation)
print("Accuracy = ",round(100*recall.mean(),5),'%')
F1 = cross_validation.cross_val_score(gaussiannb_classifier,x,y,scoring='f1_weighted',cv=num_validation)
print("Accuracy = ",round(100*F1.mean(),5),'%')
代码运行结果:
Accuracy = 99.5 %
Accuracy = 99.52381 %
Accuracy = 99.5 %
Accuracy = 99.49969 %
from sklearn.metrics import classification_report
names=['Class 0','Class 1','Class 2','Class 3']
print(classification_report(y_test,y_predict,target_names=names))
代码运行结果:
precision recall f1-score support
Class 0 0.96 1.00 0.98 23
Class 1 1.00 0.95 0.98 21
Class 2 0.97 1.00 0.99 34
Class 3 1.00 0.95 0.98 22
avg / total 0.98 0.98 0.98 100