iris_data = load_iris()
iris_data.data[0:5, :]
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
[4.6, 3.1, 1.5, 0.2],
[5. , 3.6, 1.4, 0.2]])
# 特征值名称
iris_data.feature_names
['sepal length (cm)',
'sepal width (cm)',
'petal length (cm)',
'petal width (cm)']
# 分类标签
print(iris_data.target_names)
pd.DataFrame(iris_data.target).value_counts()
['setosa' 'versicolor' 'virginica']
0 50
1 50
2 50
dtype: int64
X = pd.DataFrame(iris_data.data, columns=iris_data.feature_names)
y = iris_data.target
X.describe()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
mean | 5.843333 | 3.057333 | 3.758000 | 1.199333 |
std | 0.828066 | 0.435866 | 1.765298 | 0.762238 |
min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
plt.figure(figsize=(3,3))
sns.heatmap(X.corr(), annot=True)
plt.figure(figsize=(4,4))
sns.pairplot(X)
x_train, x_test , y_train,y_test = train_test_split(X, y, test_size=.2 , random_state=47)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
((120, 4), (120,), (30, 4), (30,))
归一化/标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
x_train.shape, x_test.shape
((120, 4), (30, 4))
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(x_train, y_train)
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
y_pred = knn_model.predict(x_test)
y_pred == y_test
array([ True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, False, True, True, True,
True, True, True])
knn_model.score(x_train, y_train)
0.9666666666666667
knn_model.score(x_test, y_test)
0.9666666666666667