sklearn knn算法用于鸢尾花分类

#knn 算法用于鸢尾花
import numpy as np
#准确率

from sklearn import datasets #自带数据集
from sklearn.metrics import accuracy_score
# knn算法
from sklearn.neighbors import KNeighborsClassifier
#训练集和测试集区分                                  交叉验证
from sklearn.model_selection import train_test_split, cross_val_score


import pandas as pd
import matplotlib.pyplot as plt



def predict():
    iris = datasets.load_iris() #加载sklearn自带的数据集
    x = iris.data #这是数据
    y = iris.target #这是每个数据所对应的标签
    x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=1/3,random_state=40)	#这里划分数据以1/3的来划分 训练集训练结果 测试集测试结果
    k = 5
    knn = KNeighborsClassifier(n_neighbors=k)
    #knn
    knn.fit(x_train, y_train)
    #预测
    pred = knn.predict(x_test)
    #模型好坏 准确率
    print (accuracy_score(y_test, pred))

#交叉验证
def cross_validation():
    iris = datasets.load_iris() #加载sklearn自带的数据集
    x = iris.data #这是数据
    y = iris.target #这是每个数据所对应的标签
    x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=1/3,random_state=40)
    k_lst = list(range(1, 30))
    lst_scores = []
    for k in k_lst:
        knn = KNeighborsClassifier(n_neighbors=k)
        scores = cross_val_score(knn, x_train, y_train, cv=4,
        scoring='accuracy')
        lst_scores.append(scores.mean())
    # changing to misclassification error
    MSE = [1 - x for x in lst_scores]
    optimal_k = k_lst[MSE.index(min(MSE))]
    print ("The optimal number of neighbors is %d" % optimal_k)
    # plot misclassification error vs k
    # plt.plot(k_lst, MSE)
    # plt.ylabel('Misclassification Error')
    plt.plot(k_lst, lst_scores)
    plt.xlabel('Number of Neighbors K')
    plt.ylabel('correct classification rate')

cross_validation()
predict()

sklearn knn算法用于鸢尾花分类_第1张图片

你可能感兴趣的:(大数据,机器学习,python,数据挖掘,深度学习,算法)