使用高斯混合模型进行聚类

如下图所示是600个数据点:
使用高斯混合模型进行聚类_第1张图片

生成模拟数据

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture

n_samples = 300

# generate random sample, two components
np.random.seed(0)

# generate spherical data centered on (4, 4)
shifted_gaussian = np.random.randn(n_samples, 2) + np.array([4, 4])

# generate zero centered stretched Gaussian data
C = np.array([[0., -0.7], [3.5, .7]])
stretched_gaussian = np.dot(np.random.randn(n_samples, 2), C)

# concatenate the two datasets into the final training set
X_train = np.vstack([shifted_gaussian, stretched_gaussian])

建模

clf = GaussianMixture(n_components=2, covariance_type='full')
clf.fit(X_train)
y_pred = clf.predict(X_train)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1]

可视化

df = pd.DataFrame(X_train,columns=['x1','x2'])
df['label']=y_pred

df1 = df[df['label']==0]
df2 = df[df['label']==1]

plt.figure(figsize=(6,6))
plt.scatter(df1['x1'], df1['x2'],s=5,c='r')
plt.scatter(df2['x1'], df2['x2'],s=5,c='g')
plt.xlim((-10,10))
plt.ylim((-10,10))
plt.xlabel('x1')
plt.ylabel('x2')
plt.grid()
plt.show()

使用高斯混合模型进行聚类_第2张图片

参数 α \alpha α μ \mu μ σ \sigma σ

print(clf.weights_) # alpha, shape=(n_components,)
print(clf.means_) # mu, shape=(n_components, n_features)
print(clf.covariances_) # sigma, shape=(n_components, n_features, n_features)
[0.49417311 0.50582689]

[[ 3.90501859  4.00249106]
 [-0.0801113  -0.05029337]]

[[[ 1.02898789  0.02561562]
  [ 0.02561562  0.93481585]]

 [[11.46811141  2.32174864]
  [ 2.32174864  0.90717932]]]

你可能感兴趣的:(使用高斯混合模型进行聚类)