自己实现机器学习算法

1、线性回归

import matplotlib.pyplot as plt
import tensorflow as tf  # (需安装tensorflow2.0)
import numpy as np


# 搭建模型
class Model(object):
  def __init__(self):
    self.W = tf.Variable(tf.random.uniform([1]))  # 随机初始化参数
    self.b = tf.Variable(tf.random.uniform([1]))

  def __call__(self, x):
    return self.W * x * x + self.b
    # return self.W * x + self.b


# 计算损失函数
def loss_fn(y, y_):
  return tf.reduce_mean(tf.square(y_ - y))


# 训练模型
def fit(model, x, y, epochs=100, learning_rate=0.01):

  """
      input:
          x: shape=(n_samples, 1)
          y: shape=(n_samples, 1)
      output:
          w,b:训练中每一步的参数列表
          l:训练中每一步的loss列表
  """

  # 收集参数
  w, b, l = [], [], []
  for epoch in range(epochs):  # 迭代次数
    with tf.GradientTape() as tape:  # 追踪梯度
      y_ = model(x)
      loss = loss_fn(y, y_)  # 计算损失
      dW, db = tape.gradient(loss, [model.W, model.b])  # 计算梯度
      model.W.assign_sub(learning_rate * dW)  # 更新梯度
      model.b.assign_sub(learning_rate * db)

      w.append(model.W.numpy()[0])
      b.append(model.b.numpy()[0])
      l.append(loss.numpy())

    # 为了画图方便所以产生迭代器,正常训练只用return即可
    yield

  return w, b, l


if __name__ == '__main__':

  # 初始化随机数据
  TRUE_W = 4.0
  TRUE_b = 2.0
  NUM_SAMPLES = 100

  X = np.random.randn(NUM_SAMPLES, 1)
  noise = np.random.randn(NUM_SAMPLES, 1) # 添加噪声
  Y = X * X * TRUE_W + TRUE_b + noise

  model = Model()

  # 画图所用数据
  x_2 = np.linspace(np.min(X), np.max(X), 100)
  iterline = fit(model, X, Y, epochs=50)

  for _ in iterline:
    plt.cla()
    plt.scatter(X, Y)
    plt.plot(x_2, model(x_2), c='r')
    plt.draw()
    plt.pause(0.05)
  plt.pause(2)

2、k-means

import matplotlib.pyplot as plt
import numpy as np
import random


# 搭建模型
class Model(object):

  def __init__(self):
    self.center = None

  def __call__(self, data):
    """
        input:
            data: shape=(n_samples, n_features)
        output:
            z: shape=(n_smaples,)
    """
    distance = self.EuclideanDistance(data, self.center)
    z = distance.argmin(axis=1)
    return z

  def fit(self, data, k):

    # 随机初始化簇中心
    n_samples = len(data)
    indices = random.sample(range(n_samples), k)
    self.center = np.copy(data[indices])

    pipe_data = []

    for j in range(50):

        distance = self.EuclideanDistance(data, self.center)  # 计算距离
        index = distance.argmin(axis=1)  # 获取最近的center索引

        # 生成onehot编码
        onehot = np.eye(k, dtype=np.float32)[index]

        # 以矩阵相乘的形式均值化簇中心
        # (n_samples, k)^T * (n_samples, n_features) = (k, n_features)
        new_center = np.matmul(np.transpose(onehot, (1, 0)), data)
        new_center = new_center / np.expand_dims(np.sum(onehot, axis=0), axis=1)

        # 计算loss
        loss = np.sum(onehot * distance)/n_samples

        # 中心不变就退出循环,可能有误差
        # if (new_center == self.center).all() : break

        # 更新center
        self.center = new_center

        # 回传数据,包括每一步训练的中心,对应数据label,loss
        pipe_data.append([self.center, index, loss])
        yield pipe_data[j]  # 画图专用迭代器,可去除

    return pipe_data

  def EuclideanDistance(self, data, center):

    """欧式距离
        input:
            data: shape=(n_samples, n_features)
            center: shape=(k, n_features)
        output:
            z: shape=(n_smaples, k)
    """
    z = np.expand_dims(data, axis=1) - center
    z = np.square(z)
    z = np.sqrt(np.sum(z, axis=2))
    return z


# 画图函数
def display(data, center, index):
  plt.cla()
  plt.scatter(data[:, 0], data[:, 1], c=index, alpha=0.8)
  plt.scatter(center[:, 0], center[:, 1], s=500, marker='*')
  plt.draw()
  plt.pause(0.05)


if __name__ == '__main__':

  # 初始化随机数据
  n_samples = 3000
  n_features = 2
  data = np.random.randn(n_samples, n_features) + [-3, 6]

  model = Model()
  iterkm = model.fit(data, k=4)

  for center, index, loss in iterkm:
    # print(model.center[0,0])
    print(loss)
    display(data,center,index)
  plt.pause(3)

  # 预测
  test = np.random.randn(20, 2)+ [-3, 6]
  index = model(test)
  display(test,model.center,index)
  plt.pause(3)

你可能感兴趣的:(自己实现机器学习算法)