tensorflow 实现kmeans

import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


num_points = 2000
conjunto_points = []
for i in range(num_points):
    if np.random.random() < 0.5:
        conjunto_points.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
    else:
        conjunto_points.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])

df = pd.DataFrame({"x1": [v[0] for v in conjunto_points], "y1": [v[1] for v in conjunto_points]})
sns.lmplot("x1", "y1", data=df, fit_reg=False)
plt.show()


sess = tf.Session()
np.set_printoptions(threshold=5)
vectors = tf.constant(conjunto_points)
k = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [k, -1]))
print ("centroides = ", tf.slice(tf.random_shuffle(vectors), [0, 0], [k, 2]).eval(session=sess))
expended_vector = tf.expand_dims(vectors, 0)
expended_centroides = tf.expand_dims(centroides, 1)
reduce_sum = tf.reduce_sum(tf.square(tf.subtract(expended_vector, expended_centroides)), 2)
assignments = tf.argmin(reduce_sum, 0)
means = tf.concat([tf.reduce_mean(tf.gather(vectors, tf.reshape(tf.where(tf.equal(assignments, c)), [1, -1])), reduction_indices=[1]) for c in range(k)], 0)
update_centroides = tf.assign(centroides, means)

init = tf.global_variables_initializer()
sess.run(init)
for i in range(100):
    pcentroides = sess.run(update_centroides)
    print (sess.run(centroides))

assignment_values = sess.run( assignments )
data = { "x":[], "y":[], "cluster":[] }
for i in range( len(assignment_values) ):
    data[ 'x' ].append( conjunto_points[i][0] )
    data[ 'y' ].append( conjunto_points[i][1] )
    data[ 'cluster' ].append( assignment_values[i] )
df = pd.DataFrame( data )
sns.lmplot( "x", "y", data=df, fit_reg = False, size=6, hue='cluster', legend = False )
plt.show()
tensorflow 实现kmeans_第1张图片
kmeans1.png
tensorflow 实现kmeans_第2张图片
kmeans2.png

你可能感兴趣的:(tensorflow 实现kmeans)