python 实现 逻辑斯蒂回归分类器LR

# --*-- coding:utf-8 --*--
import numpy as np

class Logistic:
    def loadDataSet(self, fileName = 'testSet.txt'):   # 加载数据
        dataMat = []
        labelMat = []
        fr = open(fileName)
        for line in fr.readlines():  # 遍历文件
            lineArr = line.strip().split()
            dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])  # 数据集 x=(1,x(1),x(2),x(3)⋯,x(n))  100 * 3
            labelMat.append(int(lineArr[-1]))   # 类别标签 100

        return dataMat, labelMat

    def sigmoid(self, inX):
        return 1.0 / (1 + np.exp(-inX))

    def train(self, dataSet, labels):   # 训练
        dataMat = np.mat(dataSet)   # 将数据集转成矩阵的形式 shape=(100, 3)
        labelMat = np.mat(labels).transpose()  # 将类别集合转成矩阵的形式 shape=(100, 1)

        print(dataMat.shape)
        m, n = np.shape(dataSet)    # 行列
        alpha = 0.01
        maxIter = 500
        weights = np.ones((n, 1))  # w=(b,w(1),w(2),w(3)⋯,w(n))
        print(type(weights))
        for i in range(1):    # 迭代
            h = self.sigmoid(dataMat * weights)  # (100, 1)
            error = h - labelMat    # 预测值和标签值所形成的误差  (100, 1)
            weights = weights - alpha * dataMat.transpose() * error    # 权重的更新

        return weights

    def nparraytrain(self, dataSet, labels):
        dataSet = np.array(dataSet)  # (100, 3)
        labelSet = np.array(labels)  # (100,)
        labelSet = labelSet[:, np.newaxis]  # (100, 1)
        m, n = np.shape(dataSet)
        alpha = 0.01
        maxIter = 500
        weights = np.ones((n, 1))  # (3, 1)
        for i in range(1):
            h = self.sigmoid(np.dot(dataSet, weights))  # (100, 1)
            error = h - labelSet  # (100, 1)
            weights = weights - alpha * np.dot(dataSet.transpose(), error)
            # weights = weights - alpha * dataSet.transpose() * error  # 这么写是错误的

        return weights

    def classify(self, X, weights):  # 没有运行验证过。
        prob = self.sigmoid(sum(X * weights))  # 为什么求sum
        if prob > 0.5:
            return 1.0
        else:
            return 0.0


if __name__ == '__main__':
    logistic = Logistic()
    dataSet, labels = logistic.loadDataSet()
    weights = logistic.nparraytrain(dataSet, labels)
    print(weights)

你可能感兴趣的:(python 实现 逻辑斯蒂回归分类器LR)