k-means++代码示例

文章目录

    • 模拟解决问题
      • 代码
      • 全部样本数据Mall_Customers.csv:
      • 运行结果:

模拟解决问题

利用K-Means++对会员用户进行分类,例如工资高又舍得花钱的人,工资高但不舍得花钱的人

代码

# K-Means Clustering

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('Mall_Customers.csv')
X = dataset.iloc[:, [3, 4]].values
# y = dataset.iloc[:, 3].values

# Splitting the dataset into the Training set and Test set
"""from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)"""

# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)"""

# Using the elbow method to find the optimal number of clusters
from sklearn.cluster import KMeans
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42)
    kmeans.fit(X)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

# Fitting K-Means to the dataset
# K-means本身不能解决 Random Initialization Trap,但是K-means++使用wcss算法用n_init参数能解决
kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X)

# Visualising the clusters
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Normal')
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Potential')
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Sensitive')
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Superior')
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Target')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()

全部样本数据Mall_Customers.csv:

CustomerID,Genre,Age,Annual Income (k$),Spending Score (1-100)
0001,Male,19,15,39
0002,Male,21,15,81
0003,Female,20,16,6
0004,Female,23,16,77
0005,Female,31,17,40
0006,Female,22,17,76
0007,Female,35,18,6
0008,Female,23,18,94
0009,Male,64,19,3
0010,Female,30,19,72
0011,Male,67,19,14
0012,Female,35,19,99
0013,Female,58,20,15
0014,Female,24,20,77
0015,Male,37,20,13
0016,Male,22,20,79
0017,Female,35,21,35
0018,Male,20,21,66
0019,Male,52,23,29
0020,Female,35,23,98
0021,Male,35,24,35
0022,Male,25,24,73
0023,Female,46,25,5
0024,Male,31,25,73
0025,Female,54,28,14
0026,Male,29,28,82
0027,Female,45,28,32
0028,Male,35,28,61
0029,Female,40,29,31
0030,Female,23,29,87
0031,Male,60,30,4
0032,Female,21,30,73
0033,Male,53,33,4
0034,Male,18,33,92
0035,Female,49,33,14
0036,Female,21,33,81
0037,Female,42,34,17
0038,Female,30,34,73
0039,Female,36,37,26
0040,Female,20,37,75
0041,Female,65,38,35
0042,Male,24,38,92
0043,Male,48,39,36
0044,Female,31,39,61
0045,Female,49,39,28
0046,Female,24,39,65
0047,Female,50,40,55
0048,Female,27,40,47
0049,Female,29,40,42
0050,Female,31,40,42
0051,Female,49,42,52
0052,Male,33,42,60
0053,Female,31,43,54
0054,Male,59,43,60
0055,Female,50,43,45
0056,Male,47,43,41
0057,Female,51,44,50
0058,Male,69,44,46
0059,Female,27,46,51
0060,Male,53,46,46
0061,Male,70,46,56
0062,Male,19,46,55
0063,Female,67,47,52
0064,Female,54,47,59
0065,Male,63,48,51
0066,Male,18,48,59
0067,Female,43,48,50
0068,Female,68,48,48
0069,Male,19,48,59
0070,Female,32,48,47
0071,Male,70,49,55
0072,Female,47,49,42
0073,Female,60,50,49
0074,Female,60,50,56
0075,Male,59,54,47
0076,Male,26,54,54
0077,Female,45,54,53
0078,Male,40,54,48
0079,Female,23,54,52
0080,Female,49,54,42
0081,Male,57,54,51
0082,Male,38,54,55
0083,Male,67,54,41
0084,Female,46,54,44
0085,Female,21,54,57
0086,Male,48,54,46
0087,Female,55,57,58
0088,Female,22,57,55
0089,Female,34,58,60
0090,Female,50,58,46
0091,Female,68,59,55
0092,Male,18,59,41
0093,Male,48,60,49
0094,Female,40,60,40
0095,Female,32,60,42
0096,Male,24,60,52
0097,Female,47,60,47
0098,Female,27,60,50
0099,Male,48,61,42
0100,Male,20,61,49
0101,Female,23,62,41
0102,Female,49,62,48
0103,Male,67,62,59
0104,Male,26,62,55
0105,Male,49,62,56
0106,Female,21,62,42
0107,Female,66,63,50
0108,Male,54,63,46
0109,Male,68,63,43
0110,Male,66,63,48
0111,Male,65,63,52
0112,Female,19,63,54
0113,Female,38,64,42
0114,Male,19,64,46
0115,Female,18,65,48
0116,Female,19,65,50
0117,Female,63,65,43
0118,Female,49,65,59
0119,Female,51,67,43
0120,Female,50,67,57
0121,Male,27,67,56
0122,Female,38,67,40
0123,Female,40,69,58
0124,Male,39,69,91
0125,Female,23,70,29
0126,Female,31,70,77
0127,Male,43,71,35
0128,Male,40,71,95
0129,Male,59,71,11
0130,Male,38,71,75
0131,Male,47,71,9
0132,Male,39,71,75
0133,Female,25,72,34
0134,Female,31,72,71
0135,Male,20,73,5
0136,Female,29,73,88
0137,Female,44,73,7
0138,Male,32,73,73
0139,Male,19,74,10
0140,Female,35,74,72
0141,Female,57,75,5
0142,Male,32,75,93
0143,Female,28,76,40
0144,Female,32,76,87
0145,Male,25,77,12
0146,Male,28,77,97
0147,Male,48,77,36
0148,Female,32,77,74
0149,Female,34,78,22
0150,Male,34,78,90
0151,Male,43,78,17
0152,Male,39,78,88
0153,Female,44,78,20
0154,Female,38,78,76
0155,Female,47,78,16
0156,Female,27,78,89
0157,Male,37,78,1
0158,Female,30,78,78
0159,Male,34,78,1
0160,Female,30,78,73
0161,Female,56,79,35
0162,Female,29,79,83
0163,Male,19,81,5
0164,Female,31,81,93
0165,Male,50,85,26
0166,Female,36,85,75
0167,Male,42,86,20
0168,Female,33,86,95
0169,Female,36,87,27
0170,Male,32,87,63
0171,Male,40,87,13
0172,Male,28,87,75
0173,Male,36,87,10
0174,Male,36,87,92
0175,Female,52,88,13
0176,Female,30,88,86
0177,Male,58,88,15
0178,Male,27,88,69
0179,Male,59,93,14
0180,Male,35,93,90
0181,Female,37,97,32
0182,Female,32,97,86
0183,Male,46,98,15
0184,Female,29,98,88
0185,Female,41,99,39
0186,Male,30,99,97
0187,Female,54,101,24
0188,Male,28,101,68
0189,Female,41,103,17
0190,Female,36,103,85
0191,Female,34,103,23
0192,Female,32,103,69
0193,Male,33,113,8
0194,Female,38,113,91
0195,Female,47,120,16
0196,Female,35,120,79
0197,Female,45,126,28
0198,Male,32,126,74
0199,Male,32,137,18
0200,Male,30,137,83

运行结果:

k-means++代码示例_第1张图片

你可能感兴趣的:(机器学习,k-means,机器学习,数据挖掘)