***神经网络 (BP算法的实现)***
1.基础知识
1.1神经元模型
神经网络是由具有适应性的简单单元组成的广泛并行互连的网络
1.2感知机与多层网络
Perceptron 感知机
感知机只有两层神经元组成,而且只有输出层是M-P神经单元也就是功能神经元
1.3 BP算法(error Back Propagation) 误差逆传播算法,也叫反向传播算法
PesudoCode:
输入:训练集
学习率
过程:
1.在(0,1)范围内随机初始化网络中所有的连接权值和阈值
2.repeat
3. for all (Xk,Yk) do
4. 根据当前参数和公式,计算当前样本的输出
5. 根据公式计算出输出层神经元的梯度项
6. 根据公式计算隐层神经元的梯度项
7. 根据公式更新连接权和阈值
8. end for
9. until 达到停止条件
输出:连接权与阈值确定的多层前馈神经网络
注意区分标准BP算法,和累积BP算法(accumulated error backpropagation)
累积BP算法:是将训练集进行读取一遍后才进行更新
标准BP算法:针对一个训练样例进行更新
BP算法容易过拟合,解决过拟合的策略
早停(early stopping):将数据集分为训练集和验证集,训练集用来计算梯度,更新连接权和阈值.
验证集用来估计误当训练集误差减小,但验证集误差升高则停止训练.
正则化(regularization):在目标误差函数增加一个用于描述网络复杂度的部分
1.4 各种模型的介绍
ART网络,SOM网络,结构自适应网络(级连相关网络),Elman网络(recursive neural networks)递归神经网络
Blotzmann机
还有最牛的深度学习(Deep learning)
2.编程实现BP算法,在西瓜数据集3.0上用算法训练一个单隐层神经网络
def input():
"""
@param : none or filepath
@return : dataSet,dataFrame using pandas
Random double or random.uniform()
"""
try:
import pandas as pd
except ImportError:
print("module import error")
with open('/home/dengshuo/GithubCode/ML/CH05/watermelon3.csv') as data_file:
df=pd.read_csv(data_file)
return df
def learningRatio():
"""
@ return : learningRatio
"""
try:
import random
except ImportError:
print('module import error')
learningRatio=random.uniform(0,1)
return learningRatio
ratio=learningRatio()
print(ratio)
input()
0.4410998028070725
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
|
编号 |
色泽 |
根蒂 |
敲声 |
纹理 |
脐部 |
触感 |
密度 |
含糖率 |
好瓜 |
0 |
1 |
青绿 |
蜷缩 |
浊响 |
清晰 |
凹陷 |
硬滑 |
0.697 |
0.460 |
是 |
1 |
2 |
乌黑 |
蜷缩 |
沉闷 |
清晰 |
凹陷 |
硬滑 |
0.774 |
0.376 |
是 |
2 |
3 |
乌黑 |
蜷缩 |
浊响 |
清晰 |
凹陷 |
硬滑 |
0.634 |
0.264 |
是 |
3 |
4 |
青绿 |
蜷缩 |
沉闷 |
清晰 |
凹陷 |
硬滑 |
0.608 |
0.318 |
是 |
4 |
5 |
浅白 |
蜷缩 |
浊响 |
清晰 |
凹陷 |
硬滑 |
0.556 |
0.215 |
是 |
5 |
6 |
青绿 |
稍蜷 |
浊响 |
清晰 |
稍凹 |
软粘 |
0.403 |
0.237 |
是 |
6 |
7 |
乌黑 |
稍蜷 |
浊响 |
稍糊 |
稍凹 |
软粘 |
0.481 |
0.149 |
是 |
7 |
8 |
乌黑 |
稍蜷 |
浊响 |
清晰 |
稍凹 |
硬滑 |
0.437 |
0.211 |
是 |
8 |
9 |
乌黑 |
稍蜷 |
沉闷 |
稍糊 |
稍凹 |
硬滑 |
0.666 |
0.091 |
否 |
9 |
10 |
青绿 |
硬挺 |
清脆 |
清晰 |
平坦 |
软粘 |
0.243 |
0.267 |
否 |
10 |
11 |
浅白 |
硬挺 |
清脆 |
模糊 |
平坦 |
硬滑 |
0.245 |
0.057 |
否 |
11 |
12 |
浅白 |
蜷缩 |
浊响 |
模糊 |
平坦 |
软粘 |
0.343 |
0.099 |
否 |
12 |
13 |
青绿 |
稍蜷 |
浊响 |
稍糊 |
凹陷 |
硬滑 |
0.639 |
0.161 |
否 |
13 |
14 |
浅白 |
稍蜷 |
沉闷 |
稍糊 |
凹陷 |
硬滑 |
0.657 |
0.198 |
否 |
14 |
15 |
乌黑 |
稍蜷 |
浊响 |
清晰 |
稍凹 |
软粘 |
0.360 |
0.370 |
否 |
15 |
16 |
浅白 |
蜷缩 |
浊响 |
模糊 |
平坦 |
硬滑 |
0.593 |
0.042 |
否 |
16 |
17 |
青绿 |
蜷缩 |
沉闷 |
稍糊 |
稍凹 |
硬滑 |
0.719 |
0.103 |
否 |
def outputlayer(df):
"""
@param df: the dataframe of pandas
@return Yk:the output
"""
'''
the definition of BP network class
'''
class BP_network:
def __init__(self):
'''
initial variables
'''
self.i_n = 0
self.h_n = 0
self.o_n = 0
self.i_v = []
self.h_v = []
self.o_v = []
self.ih_w = []
self.ho_w = []
self.h_t = []
self.o_t = []
self.fun = {
'Sigmoid': Sigmoid,
'SigmoidDerivate': SigmoidDerivate,
'Tanh': Tanh,
'TanhDerivate': TanhDerivate,
}
def CreateNN(self, ni, nh, no, actfun):
'''
build a BP network structure and initial parameters
@param ni, nh, no: the neuron number of each layer
@param actfun: string, the name of activation function
'''
import numpy as np
import random
self.i_n = ni
self.h_n = nh
self.o_n = no
self.i_v = np.zeros(self.i_n)
self.h_v = np.zeros(self.h_n)
self.o_v = np.zeros(self.o_n)
self.ih_w = np.zeros([self.i_n, self.h_n])
self.ho_w = np.zeros([self.h_n, self.o_n])
for i in range(self.i_n):
for h in range(self.h_n):
self.ih_w[i][h] = rand(0,1)
for h in range(self.h_n):
for j in range(self.o_n):
self.ho_w[h][j] = rand(0,1)
self.h_t = np.zeros(self.h_n)
self.o_t = np.zeros(self.o_n)
for h in range(self.h_n): self.h_t[h] = rand(0,1)
for j in range(self.o_n): self.o_t[j] = rand(0,1)
self.af = self.fun[actfun]
self.afd = self.fun[actfun+'Derivate']
'''
the definition of random function
'''
def rand(a, b):
'''
random value generation for parameter initialization
@param a,b: the upper and lower limitation of the random value
'''
from random import random
return (b - a) * random() + a
'''
the definition of activation functions
'''
def Sigmoid(x):
'''
definition of sigmoid function and it's derivation
'''
from math import exp
return 1.0 / (1.0 + exp(-x))
def SigmoidDerivate(y):
return y * (1 - y)
def Tanh(x):
'''
definition of sigmoid function and it's derivation
'''
from math import tanh
return tanh(x)
def TanhDerivate(y):
return 1 - y*y
def Pred(self, x):
'''
@param x: the input array for input layer
'''
for i in range(self.i_n):
self.i_v[i] = x[i]
for h in range(self.h_n):
total = 0.0
for i in range(self.i_n):
total += self.i_v[i] * self.ih_w[i][h]
self.h_v[h] = self.af(total - self.h_t[h])
for j in range(self.o_n):
total = 0.0
for h in range(self.h_n):
total += self.h_v[h] * self.ho_w[h][j]
self.o_v[j] = self.af(total - self.o_t[j])
还有一个问题就是,已经读取的西瓜数据,该以什么样的形式来进行输入
西瓜数据集的离散性变量该如何处理 例如:色泽{青緑,乌黑,浅白}={0,1,2} ??
如何不是这样,怎么实现离散性变量的计算?
def BackPropagate(self, x, y, lr):
'''
@param x, y: array, input and output of the data sample
@param lr: float, the learning rate of gradient decent iteration
'''
import numpy as np
self.Pred(x)
o_grid = np.zeros(self.o_n)
for j in range(self.o_n):
o_grid[j] = (y[j] - self.o_v[j]) * self.afd(self.o_v[j])
h_grid = np.zeros(self.h_n)
for h in range(self.h_n):
for j in range(self.o_n):
h_grid[h] += self.ho_w[h][j] * o_grid[j]
h_grid[h] = h_grid[h] * self.afd(self.h_v[h])
for h in range(self.h_n):
for j in range(self.o_n):
self.ho_w[h][j] += lr * o_grid[j] * self.h_v[h]
for i in range(self.i_n):
for h in range(self.h_n):
self.ih_w[i][h] += lr * h_grid[h] * self.i_v[i]
for j in range(self.o_n):
self.o_t[j] -= lr * o_grid[j]
for h in range(self.h_n):
self.h_t[h] -= lr * h_grid[h]
def TrainStandard(self, data_in, data_out, lr=0.05):
'''
@param lr, learning rate, default 0.05
@param data_in :the networks input data
@param data_out:the output data of output layer
@return: e, accumulated error
@return: e_k, error array of each step
'''
e_k = []
for k in range(len(data_in)):
x = data_in[k]
y = data_out[k]
self.BackPropagate(x, y, lr)
y_delta2 = 0.0
for j in range(self.o_n):
y_delta2 += (self.o_v[j] - y[j]) * (self.o_v[j] - y[j])
e_k.append(y_delta2/2)
e = sum(e_k)/len(e_k)
return e, e_k
def PredLabel(self, X):
'''
predict process through the network
@param X: the input sample set for input layer
@return: y, array, output set (0,1 - class) based on [winner-takes-all]
也就是竞争学习,胜者通吃
'''
import numpy as np
y = []
for m in range(len(X)):
self.Pred(X[m])
if self.o_v[0] > 0.5: y.append(1)
else : y.append(0)
return np.array(y)