https://blog.csdn.net/songbinxu/article/details/79662665
https://blog.csdn.net/google19890102/article/details/45532745
https://blog.csdn.net/jediael_lu/article/details/77772565
https://blog.csdn.net/asd136912/article/details/78318563
# -*- coding: utf-8 -*-
import pandas as pd
from math import exp
import numpy as np
from random import normalvariate
from sklearn import datasets
path = 'D:/ml_project/minist/'
def sigmoid(x): return 1.0 / (1 + exp(-x))
def stocGradAscent(dataMatrix, classLabels, k, iteration):
#dataMatrix用的是mat, classLabels是列表
m, n = np.shape(dataMatrix)
alpha = 0.01
#初始化参数
w = np.zeros((n, 1))#其中n是特征的个数
w_0 = 0.
v = normalvariate(0, 0.2) * np.ones((n, k))
for it in range(iteration):
print("第{}次迭代...".format(it))
for x in range(m):#随机优化,对每一个样本而言的
inter_1 = dataMatrix[x] * v
inter_2 = np.multiply(dataMatrix[x], dataMatrix[x]) * np.multiply(v, v)#multiply对应元素相乘
#完成交叉项
interaction = sum(np.multiply(inter_1, inter_1) - inter_2) / 2.
p = w_0 + dataMatrix[x] * w + interaction#计算预测的输出
loss = sigmoid(classLabels[x] * p[0, 0]) - 1
print(loss)
w_0 = w_0 - alpha * loss * classLabels[x]
for i in range(n):
if dataMatrix[x, i] != 0:
w[i, 0] = w[i, 0] - alpha * loss * classLabels[x] * dataMatrix[x, i]
for j in range(k):
v[i, j] = v[i, j] - alpha * loss * classLabels[x] * (dataMatrix[x, i] * inter_1[0, j] - v[i, j] * dataMatrix[x, i] * dataMatrix[x, i])
return w_0, w, v
def getAccuracy(dataMatrix, classLabels, w_0, w, v):
m, n = np.shape(dataMatrix)
allItem = 0
error = 0
result = []
for x in range(m):
allItem += 1
inter_1 = dataMatrix[x] * v
inter_2 = np.multiply(dataMatrix[x], dataMatrix[x]) * np.multiply(v, v)#multiply对应元素相乘
#完成交叉项
interaction = sum(np.multiply(inter_1, inter_1) - inter_2) / 2.
p = w_0 + dataMatrix[x] * w + interaction#计算预测的输出
pre = sigmoid(p[0, 0])
result.append(pre)
if pre < 0.5 and classLabels[x] == 1.0:
error += 1
elif pre >= 0.5 and classLabels[x] == -1.0:
error += 1
else:
continue
return result, float(error) / allItem
if __name__ == '__main__':
train = datasets.load_diabetes()
train_X = np.mat(train['data'])
train_y = train['target']
train_y[train_y <= 180] = -1
train_y[train_y > 180] = 1
w_0, w, v = stocGradAscent(train_X, train_y, 5, 200)
result, acc = getAccuracy(train_X, train_y, w_0, w, v)