非线性回归(Non-linear Regression)

非线性回归应用(Logistic Regression Application)

import numpy as np
import random

# 一个函数为梯度下降的算法
def GradientDescent(x,y,theta,alpha,m,numInterations):# m denotes the number of examples here, not the number of features
    '''x:实例;y:分类标签
    theta:要学习的参数θ
    alpha:learning rate
    m:更新法则公式中实例的个数,对应矩阵的维数[]
    numInterations:使用此方法循环训练更新的次数
    '''
    xTrans = x.transpose() #转置x便于后面运算
    for i in range(0,numInterations):
        hypothesis = np.dot(x,theta) #这里为什么要放在for循环里面,并不受循环影响? #for循环次数即为更新次数
        loss = hypothesis - y #hypothesis其实就是y_hat,这里loss就等于y_hat减去y(实际)
        # avg cost per example (the 2 in 2*m doesn't really matter here.
        # But to be consistent with the gradient, I include it
        cost = np.sum(loss**2)/(2*m)#这里的cost函数与课文中提到的cost函数不一样,这里使用了一个简单的cost便于计算
        '''cost:对精确度的衡量,每一次gradient都会减小'''
        print('Interation:%d|cost:%f'%(i,cost))
        # avg gradient per example
        gradient = np.dot(xTrans,loss)/m #每一次的下降梯度值,除以m:取平均
        # updata
        theta = theta-alpha*gradient  #即更新法则的公式:θ=θ-α∑(h(x)-y)x
    return theta

# 一个函数用来产生数据用来测试拟合
def genData(numPoints,bias,variance):
    '''numPoints:实例的行数(矩阵形式,每一行对应一对实例)
    bias:生成y时产生一个偏差值
    variance:方差'''
    x = np.zeros(shape=(numPoints,2)) #numPoints行,2列的矩阵
    y = np.zeros(shape=(numPoints))
    #basically a staight line
    for i in range(0,numPoints):
        # bias feature
        x[i][0] = 1
        x[i][1] = i
        # target variable
        y[i] = (i+bias)+random.uniform(0,1)*variance #random.uniform(0,1)同random.random()产生0~1随机数
    return x,y

# generate 100 columns with a bias of 25 and 10 variance as a bit of noise
x,y = genData(100,25,10)#前面函数返回了两个变量x,y此处可以任意取两个变量按偏移量赋值给返回的x和y
# print(x)
# print(y)
m,n = np.shape(x) #x的行数赋值给m,列数赋值为n  
a = np.shape(y) #y只有一列不会返回列的数值,会返回行的数值  
# print(m,n) #(100行,2列)
# print(a) #(100行,1列)

numInterations = 100000
alpha = 0.0005 #取0~1,比较好的算法会设置开始的alpha数值较大后期数值较小
theta = np.ones(n) # 初始化θ:[1. 1.] 为什么设置为1?
theta = GradientDescent(x,y,theta,alpha,m,numInterations)
print(theta) #约为[30 1]

# 得出的theta就可以用于对新实例的计算和预测
#回归算法和神经网络中都会用到此梯度下降的方法

你可能感兴趣的:(非线性回归(Non-linear Regression))