cls = AgglomerativeClustering(n_clusters=group_size,linkage='ward')
linkage 参数说明:
linkage : {"ward", "complete", "average", "single"}, optional \
(default="ward")
Which linkage criterion to use. The linkage criterion determines which
distance to use between sets of observation. The algorithm will merge
the pairs of cluster that minimize this criterion.
- ward minimizes the variance of the clusters being merged.
- average uses the average of the distances of each observation of
the two sets.
- complete or maximum linkage uses the maximum distances between
all observations of the two sets.
- single uses the minimum of the distances between all observations
of the two sets.
import numpy as np
from sklearn.cluster import AgglomerativeClustering
import random
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
point_random = []
for i in range(10):
rx = random.uniform(0, 10)
ry = random.uniform(0, 10)
point_random.append([rx, ry])
# 簇族 分组量
group_size = 3
cls = AgglomerativeClustering(n_clusters=group_size,linkage='ward')
cluster_group = cls.fit(np.array(point_random))
cnames = ['black', 'blue', 'red']
for point, gp_id in zip(point_random, cluster_group.labels_):
# 放到 plt 中展示
plt.scatter(point[0], point[1], s=5, c=cnames[gp_id], alpha=1)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from scipy.cluster.hierarchy import dendrogram, linkage
data = np.random.randint(0,10,size=[10,2])
Z = linkage(data)
dendrogram(Z)
id | sex | age | annual_income | annual_expenditure |
---|---|---|---|---|
1 | Male | 19 | 15 | 13 |
2 | Male | 21 | 15 | 81 |
3 | Female | 20 | 16 | 13 |
#%%
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.cluster.hierarchy as shc
customer_data = pd.read_csv(r'.\shopping_data.csv')
# 获取收入和支出
data = customer_data.iloc[:, 3:5].values
plt.figure(figsize=(10, 7))
plt.title("Customer Dendograms")
dend = shc.dendrogram(shc.linkage(data, method='ward'))
plt.savefig("./收支-01.jpg")
plt.show()
#%%
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')
cluster.fit_predict(data)
plt.figure(figsize=(10, 7))
plt.scatter(data[:,0], data[:,1], c=cluster.labels_, cmap='rainbow')
plt.savefig("./收支-02.jpg")
plt.show()