DBSCAN

https://blog.csdn.net/zdy0_2004/article/details/72953531

https://blog.csdn.net/lishuhuakai/article/details/53980517

import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt

df = pd.read_csv('E:\date\clustering.csv', usecols=[0, 2])

d = df.groupby('sjfldm')['sjbh'].apply(list)
x = np.array(d)


def jaccard(p, q):
    c = 0
    for i in p:
        if i in q:
            c = c + 1
    return float(1 - c / (len(p) + len(q) - c))


J = [([-1.] * len(x)) for i in range(len(x))]
for i in range(0, len(x)):
    for j in range(0, len(x)):
        if i == j:
            J[i][j] = 0
        else:
            if J[i][j] == -1.:
                J[i][j] = jaccard(x[i], x[j])
            else:
                J[i][j] = J[j][i]
S = np.array(J)
print(S)

db = DBSCAN(eps=0.9, min_samples=2, metric='precomputed').fit(S)
labels = db.labels_
core_sample_mask1 = np.zeros_like(db.labels_, dtype=bool)
core_sample_mask1[db.core_sample_indices_] = True

n_clusters_ = len(np.unique(labels)) - (1 if -1 in labels else 0)
print(n_clusters_)

#unique_label = set(labels)
#颜色 colors = [plt.get_cmap('Spectral')(each) for each in np.linspace(0, 1, len(unique_label))]

你可能感兴趣的:(DBSCAN)