大数据入门——Iris数据集类别预测(K近邻分类器:KNN)

#读取数据集细节资料
from sklearn.datasets import load_iris

iris=load_iris()
print(iris.data.shape)
print(iris.DESCR)


#对数据集进行随机分割
from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test=train_test_split(iris.data, iris.target,
test_size=0.25, random_state=33)


#使用K近邻分类器对数据进行类别预测
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier

ss=StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)

knc=KNeighborsClassifier()
knc.fit(X_train, y_train)
y_predict=knc.predict(X_test)


#对K近邻分类器的预测性能进行评估
print('The accuracy of K-Nearest Neighbor Classifier is', knc.score(X_test, y_test))

from sklearn.metrics import classification_report 

print(classification_report(y_test, y_predict, target_names=iris.target_names))

你可能感兴趣的:(大数据竞赛,大数据竞赛,大数据,KNN,K近邻,Iris数据集)