用 Python 实现逻辑回归(Logistic Regression)

本文采用的训练方法是牛顿法(Newton Method)。

代码

import numpy as np

class LogisticRegression(object):
    """
    Logistic Regression Classifier training by Newton Method
    """

    def __init__(self, error: float = 0.7, max_epoch: int = 100):
        """
        :param error: float, if the distance between new weight and 
                      old weight is less than error, the process 
                      of traing will break.
        :param max_epoch: if training epoch >= max_epoch the process 
                          of traing will break.
        """
        self.error = error
        self.max_epoch = max_epoch
        self.weight = None
        self.sign = np.vectorize(lambda x: 1 if x >= 0.5 else 0)

    def p_func(self, X_):
        """Get P(y=1 | x)
        :param X_: shape = (n_samples + 1, n_features)
        :return: shape = (n_samples)
        """
        tmp = np.exp(self.weight @ X_.T)
        return tmp / (1 + tmp)

    def diff(self, X_, y, p):
        """Get derivative
        :param X_: shape = (n_samples, n_features + 1) 
        :param y: shape = (n_samples)
        :param p: shape = (n_samples) P(y=1 | x)
        :return:  shape = (n_features + 1) first derivative
        """
        return -(y - p) @ X_

    def hess_mat(self, X_, p):
        """Get Hessian Matrix
        :param p: shape = (n_samples) P(y=1 | x)
        :return: shape = (n_features + 1, n_features + 1) second derivative
        """
        hess = np.zeros((X_.shape[1], X_.shape[1]))
        for i in range(X_.shape[0]):
            hess += self.X_XT[i] * p[i] * (1 - p[i])
        return hess

    def newton_method(self, X_, y):
        """Newton Method to calculate weight
        :param X_: shape = (n_samples + 1, n_features)
        :param y: shape = (n_samples)
        :return: None
        """
        self.weight = np.ones(X_.shape[1])
        self.X_XT = []
        for i in range(X_.shape[0]):
            t = X_[i, :].reshape((-1, 1))
            self.X_XT.append(t @ t.T)

        for _ in range(self.max_epoch):
            p = self.p_func(X_)
            diff = self.diff(X_, y, p)
            hess = self.hess_mat(X_, p)
            new_weight = self.weight - (np.linalg.inv(hess) @ diff.reshape((-1, 1))).flatten()

            if np.linalg.norm(new_weight - self.weight) <= self.error:
                break
            self.weight = new_weight

    def fit(self, X, y):
        """
        :param X_: shape = (n_samples, n_features)
        :param y: shape = (n_samples)
        :return: self
        """
        X_ = np.c_[np.ones(X.shape[0]), X]
        self.newton_method(X_, y)
        return self

    def predict(self, X) -> np.array:
        """
        :param X: shape = (n_samples, n_features] 
        :return: shape = (n_samples]
        """
        X_ = np.c_[np.ones(X.shape[0]), X]
        return self.sign(self.p_func(X_))

测试代码

import matplotlib.pyplot as plt
import sklearn.datasets

def plot_decision_boundary(pred_func, X, y, title=None):
    """分类器画图函数,可画出样本点和决策边界
    :param pred_func: predict函数
    :param X: 训练集X
    :param y: 训练集Y
    :return: None
    """

    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
    y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    # Predict the function value for the whole gid
    Z = pred_func(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.scatter(X[:, 0], X[:, 1], s=40, c=y, cmap=plt.cm.Spectral)
    if title:
        plt.title(title)
    plt.show()

效果

用 Python 实现逻辑回归(Logistic Regression)_第1张图片
效果

更多机器学习代码,请访问 https://github.com/WiseDoge/plume

你可能感兴趣的:(用 Python 实现逻辑回归(Logistic Regression))