mclust学习总结

mclust example1

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=300, centers=4, cluster_std=0.60, random_state=0)
# n_features=2是默认的
plt.scatter(X[:,0], X[:,1])
plt.show()

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
label_mclust = mclust(X, num_cluster=4)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

pred_y = label_mclust.copy()
fig=plt.figure()
for label in np.unique(pred_y):
    plt.scatter(X[label==pred_y,0], X[label==pred_y,1],label=label)

plt.show()

结果如下
mclust学习总结_第1张图片
mclust学习总结_第2张图片

mclust (example 2)

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.datasets import make_blobs

def mclust(features, num_cluster, modelNames='EEE', random_seed=2020):
    """\
    Clustering using the mclust algorithm.
    The parameters are the same as those in the R package mclust.
    """
    
    np.random.seed(random_seed)
    import rpy2.robjects as robjects
    robjects.r.library("mclust")

    import rpy2.robjects.numpy2ri
    rpy2.robjects.numpy2ri.activate()
    r_random_seed = robjects.r['set.seed']
    r_random_seed(random_seed)
    rmclust = robjects.r['Mclust']

    res = rmclust(rpy2.robjects.numpy2ri.numpy2rpy(features), num_cluster, modelNames)
    mclust_res = np.array(res[-2])

    return mclust_res.astype(int)
                  
X, y = make_blobs(n_samples=1000, n_features=50,centers=5, random_state=0)
print(X.shape)

label_mclust = mclust(X, num_cluster=5)

from sklearn.metrics import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score
print("ARI = {}".format(adjusted_rand_score(label_mclust,y)))
print("NMI = {}".format(normalized_mutual_info_score(label_mclust,y)))

在这里插入图片描述

你可能感兴趣的:(学习,python,开发语言)