自组织映射Python实现

自组织映射(Self-organizing map)Python实现。仅供学习。

#!/usr/bin/env python3

"""
Self-organizing map
"""

from math import exp

import toolz

import numpy as np
import numpy.linalg as LA

from sklearn.base import ClusterMixin
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()


class Node:
    """Node
    
    Attributes:
        location (np.ndarray): location of the node
        weight (np.ndarray): weight of the node, in the data sp.
    
    """
    def __init__(self, weight, location=None):
        self.weight = weight
        self.location = location

    def normalize(self):
        return self.weight / LA.norm(self.weight)

    def output(self, x):
        # similarity between the node and the input `x`
        return LA.norm(x - self.weight)

    def near(self, other, d=0.2):
        # judge the neighborhood of the nodes by locations
        if self.location is not None and other.location is not None:
            return LA.norm(self.location - other.location) < d
        else:
            return 0

    def update(self, x, eta=0.1):
        """update the weight of the node
        w += r (x-w)
        """
        self.weight += eta *(x - self.weight)

    @staticmethod
    def random(n=2):
        weight = np.random.random(n)
        location = np.random.random(2)
        node = Node(weight, location)
        node.normalize()
        return node

    def plot(self, axes, i1=0, i2=1, *args, **kwargs):
        x1, x2 = self.weight[i1], self.weight[i2]
        axes.plot(x1, x2, *args, **kwargs)


class Layer(ClusterMixin):
    """
    Layer of SOM

    A Grid of nodes
    """

    def __init__(self, nodes):
        self.nodes = list(nodes)

    @staticmethod
    def random(n_nodes=100, *args, **kwargs):
        return Layer([Node.random(*args, **kwargs) for _ in range(n_nodes)])

    def output(self, x):
        # all outputs(similarity to x) of the nodes
        return [node.output(x) for node in self.nodes]

    def champer(self, x):
        """champer node: best matching unit (BMU)
        """
        return self.nodes[self.predict(x)]

    def predict(self, x):
        """the index of best matching unit (BMU)
        """
        return np.argmin(self.output(x))

    def update(self, x, eta=0.5, d=0.5):
        # update the nerighors of the best node
        c = self.champer(x)
        for node in self.nodes:
            if node.near(c, d):
                node.update(x, eta)

    def plot(self, axes, i1=0, i2=1, *args, **kwargs):
        x1 = [node.weight[i1] for node in self.nodes]
        x2 = [node.weight[i2] for node in self.nodes]
        axes.scatter(x1, x2, *args, **kwargs)

    def fit(self, data, eta=0.2, d=0.2, max_iter=100):
        data = scaler.fit_transform(data)
        for t in range(max_iter):
            for x in data:
                self.update(x, eta=eta*exp(-t/10), d=d*exp(-t/10))


if __name__ == '__main__':
    try:
        import pandas as pd
        df = pd.read_csv('heart.csv')  # input your data
    except Exception as e:
        printe(e)
        raise Exception('Please input your data!')

    def _grid(size=(5, 5), *args, **kwargs):
        grid = []
        r, c = size
        for k in range(1,r):
            row = []
            for l in range(1,c):
                weight = np.array((k/r, l/c))
                # weight = np.random.random(kwargs['dim']) # for randomly generating
                location = np.array((k/r, l/c))
                node = Node(weight=weight, location=location)
                row.append(node)
            grid.append(row)
        return grid

    df = df[['trestbps', 'chol']]
    N, p = df.shape
    X = df.values.astype('float')
    
    import matplotlib.pyplot as plt
    fig = plt.figure()
    ax = fig.add_subplot(111)
    X_ = scaler.fit_transform(X)
    ax.plot(X_[:,0], X_[:,1], 'o')
    g = _grid(size=(5,5), dim=p)

    for row in g:
        x = [node.weight[0] for node in row]
        y = [node.weight[1] for node in row]
        ax.plot(x, y, 'g--')
    for col in zip(*g):
        x = [node.weight[0] for node in col]
        y = [node.weight[1] for node in col]
        ax.plot(x, y, 'g--')

    l = Layer(nodes=toolz.concat(g))
    l.plot(ax, marker='s', color='g', alpha=0.2)

    l.fit(X[:N//2,:], max_iter=50)
    l.plot(ax, marker='+', color='r')
    for row in g:
        x = [node.weight[0] for node in row]
        y = [node.weight[1] for node in row]
        ax.plot(x, y, 'r')
    for col in zip(*g):
        x = [node.weight[0] for node in col]
        y = [node.weight[1] for node in col]
        ax.plot(x, y, 'r')

    ax.set_title('Demo of SOM')
    ax.legend(('Data', 'Initial nodes', 'Terminal nodes'))
    plt.show()

自组织映射Python实现_第1张图片

你可能感兴趣的:(机器学习,Python源码,1024程序员节,python,机器学习,自组织映射)