自组织映射(Self-organizing map)Python实现。仅供学习。
#!/usr/bin/env python3
"""
Self-organizing map
"""
from math import exp
import toolz
import numpy as np
import numpy.linalg as LA
from sklearn.base import ClusterMixin
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
class Node:
"""Node
Attributes:
location (np.ndarray): location of the node
weight (np.ndarray): weight of the node, in the data sp.
"""
def __init__(self, weight, location=None):
self.weight = weight
self.location = location
def normalize(self):
return self.weight / LA.norm(self.weight)
def output(self, x):
# similarity between the node and the input `x`
return LA.norm(x - self.weight)
def near(self, other, d=0.2):
# judge the neighborhood of the nodes by locations
if self.location is not None and other.location is not None:
return LA.norm(self.location - other.location) < d
else:
return 0
def update(self, x, eta=0.1):
"""update the weight of the node
w += r (x-w)
"""
self.weight += eta *(x - self.weight)
@staticmethod
def random(n=2):
weight = np.random.random(n)
location = np.random.random(2)
node = Node(weight, location)
node.normalize()
return node
def plot(self, axes, i1=0, i2=1, *args, **kwargs):
x1, x2 = self.weight[i1], self.weight[i2]
axes.plot(x1, x2, *args, **kwargs)
class Layer(ClusterMixin):
"""
Layer of SOM
A Grid of nodes
"""
def __init__(self, nodes):
self.nodes = list(nodes)
@staticmethod
def random(n_nodes=100, *args, **kwargs):
return Layer([Node.random(*args, **kwargs) for _ in range(n_nodes)])
def output(self, x):
# all outputs(similarity to x) of the nodes
return [node.output(x) for node in self.nodes]
def champer(self, x):
"""champer node: best matching unit (BMU)
"""
return self.nodes[self.predict(x)]
def predict(self, x):
"""the index of best matching unit (BMU)
"""
return np.argmin(self.output(x))
def update(self, x, eta=0.5, d=0.5):
# update the nerighors of the best node
c = self.champer(x)
for node in self.nodes:
if node.near(c, d):
node.update(x, eta)
def plot(self, axes, i1=0, i2=1, *args, **kwargs):
x1 = [node.weight[i1] for node in self.nodes]
x2 = [node.weight[i2] for node in self.nodes]
axes.scatter(x1, x2, *args, **kwargs)
def fit(self, data, eta=0.2, d=0.2, max_iter=100):
data = scaler.fit_transform(data)
for t in range(max_iter):
for x in data:
self.update(x, eta=eta*exp(-t/10), d=d*exp(-t/10))
if __name__ == '__main__':
try:
import pandas as pd
df = pd.read_csv('heart.csv') # input your data
except Exception as e:
printe(e)
raise Exception('Please input your data!')
def _grid(size=(5, 5), *args, **kwargs):
grid = []
r, c = size
for k in range(1,r):
row = []
for l in range(1,c):
weight = np.array((k/r, l/c))
# weight = np.random.random(kwargs['dim']) # for randomly generating
location = np.array((k/r, l/c))
node = Node(weight=weight, location=location)
row.append(node)
grid.append(row)
return grid
df = df[['trestbps', 'chol']]
N, p = df.shape
X = df.values.astype('float')
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_subplot(111)
X_ = scaler.fit_transform(X)
ax.plot(X_[:,0], X_[:,1], 'o')
g = _grid(size=(5,5), dim=p)
for row in g:
x = [node.weight[0] for node in row]
y = [node.weight[1] for node in row]
ax.plot(x, y, 'g--')
for col in zip(*g):
x = [node.weight[0] for node in col]
y = [node.weight[1] for node in col]
ax.plot(x, y, 'g--')
l = Layer(nodes=toolz.concat(g))
l.plot(ax, marker='s', color='g', alpha=0.2)
l.fit(X[:N//2,:], max_iter=50)
l.plot(ax, marker='+', color='r')
for row in g:
x = [node.weight[0] for node in row]
y = [node.weight[1] for node in row]
ax.plot(x, y, 'r')
for col in zip(*g):
x = [node.weight[0] for node in col]
y = [node.weight[1] for node in col]
ax.plot(x, y, 'r')
ax.set_title('Demo of SOM')
ax.legend(('Data', 'Initial nodes', 'Terminal nodes'))
plt.show()