import pandas as pd
import numpy as np
df = {"A":np.random.normal(0, 1, 1000),
"B":np.random.normal(0, 1, 1000),
"C":np.random.normal(0, 1, 1000),
"D":np.random.normal(0, 1, 1000)}
df = pd.DataFrame(df)
df.head()
输出:
A B C D
0 -1.197896 2.012483 -3.483592 -1.871044
1 -0.107903 -0.176872 -0.530986 0.308957
2 0.347445 -0.600894 0.434848 -0.496437
3 -1.303541 0.194091 -0.699378 -0.929843
4 1.144723 -0.310182 0.405988 1.393773
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4)
k_fit = kmeans.fit(df)
predictions = k_fit.labels_
df['Clusters'] = predictions
df.head()
输出:
A B C D Clusters
0 -1.197896 2.012483 -3.483592 -1.871044 2
1 -0.107903 -0.176872 -0.530986 0.308957 1
2 0.347445 -0.600894 0.434848 -0.496437 0
3 -1.303541 0.194091 -0.699378 -0.929843 1
4 1.144723 -0.310182 0.405988 1.393773 3
from sklearn.manifold import TSNE
Xtsne = TSNE(n_components=2).fit_transform(df)
import matplotlib.pyplot as plt
import seaborn as sns
dftsne = pd.DataFrame(data=Xtsne , columns = ['x1','x2'])
dftsne['cluster'] = predictions
plt.figure(figsize=(10,10))
sns.scatterplot(data=dftsne,x='x1',y='x2',hue='cluster',legend="full", palette='tab10', alpha=0.8)
plt.title('Clusters Visualized on TSNE 2D')
plt.show()
import pandas as pd
import numpy as np
df = {"A":np.random.normal(0, 1, 1000),
"B":np.random.normal(0, 1, 1000),
"C":np.random.normal(0, 1, 1000),
"D":np.random.normal(0, 1, 1000)}
df = pd.DataFrame(df)
df.head()
输出:
A B C D
0 0.313137 0.834052 1.089202 -0.761206
1 -0.245337 -0.395838 0.772860 0.976120
2 -1.163416 1.389154 0.643586 0.975716
3 0.162503 -0.413536 -1.052316 2.248309
4 -0.126101 0.196020 -0.634920 0.633622
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4)
k_fit = kmeans.fit(df)
predictions = k_fit.labels_
df['Clusters'] = predictions
df.head()
输出:
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=4)
k_fit = kmeans.fit(df)
predictions = k_fit.labels_
df['Clusters'] = predictions
df.head()
from sklearn.manifold import TSNE
Xtsne = TSNE(n_components=3).fit_transform(df)
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
dftsne = pd.DataFrame(data=Xtsne , columns = ['x1','x2','x3'])
dftsne['cluster'] = predictions
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')
x = dftsne['x1']
y = dftsne['x2']
z = dftsne['x3']
ax.scatter(x, y, z,c = predictions, cmap="jet", marker="o")
plt.title("KMeans (N = 4)");
ax.set_xlabel('x1')
ax.set_ylabel('x2')
ax.set_zlabel('x3')
plt.show()