实验代码
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
iris=load_iris()
iris_dataset=pd.DataFrame(iris['data'],columns=iris['feature_names'])
silhouette_scores=list()
max_score=0
best_clusterCount=0
for cluster_count in range(2,11):
Kmeans_estimator=KMeans(n_clusters=cluster_count,init="k-means++",max_iter=10000,random_state=0)
Kmeans_estimator.fit(iris_dataset)
Kmeans_result=Kmeans_estimator.predict(iris_dataset)
judge_score= silhouette_score(iris_dataset,Kmeans_result)
silhouette_scores.append(judge_score)
if judge_score>max_score:
max_score=judge_score
best_clusterCount=cluster_count
plt.figure(1)
plt.xlabel("聚类个数",fontproperties="Simhei")
plt.ylabel("轮廓系数",fontproperties="Simhei")
plt.axis([1,11,0,1])
plt.grid(True)
plt.title("轮廓系数折线图",fontproperties="Simhei")
plt.plot(range(2,11),silhouette_scores,'r-o')
plt.show()
print("通过轮廓系数比较得出的最佳聚类个数是:",best_clusterCount,",对应的最大轮廓系数为:",max_score)
Kmeans_estimator=KMeans(n_clusters=best_clusterCount,init="k-means++",max_iter=10000,random_state=0)
Kmeans_estimator.fit(iris_dataset)
labels=Kmeans_estimator.labels_
Kmeans_result=Kmeans_estimator.predict(iris_dataset)
iris_dataset['cluster_result']=Kmeans_result
iris_dataset1 = iris_dataset[labels == 0]
iris_dataset2 = iris_dataset[labels == 1]
plt.figure(2)
plt.xlabel("sepal length (cm)")
plt.ylabel("sepal width (cm)")
plt.grid(True)
plt.title("sepal length - sepal width Figure")
line1,=plt.plot(iris_dataset1['sepal length (cm)'],iris_dataset1['sepal width (cm)'],"r o")
line2,=plt.plot(iris_dataset2['sepal length (cm)'],iris_dataset2['sepal width (cm)'],"b o")
plt.legend([line1,line2],["Cluster_1","Cluster_2"])
plt.show()
plt.figure(3)
plt.xlabel("petal length (cm)")
plt.ylabel("petal width (cm)")
plt.grid(True)
plt.title("petal length - petal width Figure")
line1,=plt.plot(iris_dataset1['petal length (cm)'],iris_dataset1['petal width (cm)'],"r o")
line2,=plt.plot(iris_dataset2['petal length (cm)'],iris_dataset2['petal width (cm)'],"b o")
plt.legend([line1,line2],["Cluster_1","Cluster_2"])
plt.show()
plt.figure(4)
plt.xlabel("sepal length (cm)")
plt.ylabel("petal length (cm)")
plt.grid(True)
plt.title("sepal length - petal length Figure")
line1,=plt.plot(iris_dataset1['sepal length (cm)'],iris_dataset1['petal length (cm)'],"r o")
line2,=plt.plot(iris_dataset2['sepal length (cm)'],iris_dataset2['petal length (cm)'],"b o")
plt.legend([line1,line2],["Cluster_1","Cluster_2"])
plt.show()
plt.figure(4)
plt.xlabel("sepal width (cm)")
plt.ylabel("petal width (cm)")
plt.grid(True)
plt.title("sepal width - petal width Figure")
line1,=plt.plot(iris_dataset1['sepal width (cm)'],iris_dataset1['petal width (cm)'],"r o")
line2,=plt.plot(iris_dataset2['sepal width (cm)'],iris_dataset2['petal width (cm)'],"b o")
plt.legend([line1,line2],["Cluster_1","Cluster_2"])
plt.show()
结果展示