python实现学生成绩聚类分析_聚类分析的python实现

K-Means算法import numpy as np

import pandas as pd

from sklearn.cluster import KMeans

data = pd.read_csv('multi_vol.csv')

data1 = data.T #使待分类样本格式正确

estimator = KMeans(n_clusters=4) #构造聚类器

estimator.fit(data1)

label_pred = estimator.labels_  #最终聚类类别

centroids = estimator.cluster_centers_ #最终聚类中心

inertia = estimator.inertia_

学习向量量化LVQ#迭代轮数

for i in range(loops):

#随机产生样本

index = np.random.randint(0,30)

min_dist = sum((q[0] - x[index])**2)

q_index = 0

#搜索里样本最近原型向量

for j in range(1,len(q)):

dist = sum((q[j] - x[index])**2)

if dist < min_dist:

min_dist = dist

q_index = j

if q_label[q_index] == y[index]:

#样本标记和原型向量标记相同,该原型向量向样本方向移动

#eta为学习率

q[q_index] += eta*(x[index]-q[q_index])

else:

#样本标记和原型向量标记相同,该原型向量远离样本方向

q[q_index] -= eta*(x[index]-q[q_index])

#画图

for i in range(len(x)):

if y[i] == 0:

plt.plot(x[i,0],x[i,1],'or')

else:

plt.plot(x[i,0],x[i,1],'o',color='black')

for i in range(len(q)):

plt.plot(q[i,0],q[i,1],marker='*',color='blue')

高斯混合聚类 GMMfrom sklearn import mixture

def test_GMM(dataMat, components=3,iter = 100,cov_type="full"):

clst = mixture.GaussianMixture(n_components=n_components,max_iter=iter,covariance_type=cov_type)

clst.fit(dataMat)

predicted_labels =clst.predict(dataMat)

return clst.means_,predicted_labels    # clst.means_返回均值

层次聚类import numpy

import pandas

from sklearn import datasets

import scipy.cluster.hierarchy as hcluster

iris = datasets.load_iris()

data = iris.data

target = iris.target

# Compute and plot first dendrogram.

linkage = hcluster.linkage( data, method='centroid')

hcluster.dendrogram( linkage, leaf_font_size=10.)

hcluster.dendrogram( linkage, truncate_mode='lastp', p=12, leaf_font_size=12.)

p = hcluster.fcluster( linkage,3, criterion='maxclust')

ct = pandas.DataFrame({'p': p,'t': target}).pivot_table( index=['t'], columns=['p'], aggfunc=[numpy.size])

密度聚类 DBSCANimport pandas

import matplotlib.pyplot as plt

from sklearn.cluster import DBSCAN

#导入数据

data = pandas.read_csv("%%%%.csv")

eps =0.2

MinPts =5

model = DBSCAN(eps, MinPts)

model.fit(data)

data['type'] = model.fit_predict(data)

plt.scatter( data['x'], data['y'], c=data['type'])

参考引用:

LVQ:https://blog.csdn.net/weixin_35732969/article/details/81141005

GMM:https://blog.csdn.net/FAICULTY/article/details/79343640

层次聚类:https://www.jianshu.com/p/b5e97f8d420b

密度聚类:https://www.jianshu.com/p/c2415196cc34

你可能感兴趣的:(python实现学生成绩聚类分析_聚类分析的python实现)