全连接层实现代码:
class Dense(Layer):
"""A fully-connected NN layer.
Parameters:
-----------
n_units: int
The number of neurons in the layer.
input_shape: tuple
The expected input shape of the layer. For dense layers a single digit specifying
the number of features of the input. Must be specified if it is the first layer in
the network.
"""
def __init__(self, n_units, input_shape=None):
self.layer_input = None
self.input_shape = input_shape
self.n_units = n_units
self.trainable = True
self.W = None
self.w0 = None
def initialize(self, optimizer):
# Initialize the weights
limit = 1 / math.sqrt(self.input_shape[0])
self.W = np.random.uniform(-limit, limit, (self.input_shape[0], self.n_units))
self.w0 = np.zeros((1, self.n_units))
# Weight optimizers
self.W_opt = copy.copy(optimizer)
self.w0_opt = copy.copy(optimizer)
def parameters(self):
return np.prod(self.W.shape) + np.prod(self.w0.shape)
def forward_pass(self, X, training=True):
self.layer_input = X
return X.dot(self.W) + self.w0
def backward_pass(self, accum_grad):
# Save weights used during forwards pass
W = self.W
if self.trainable:
# Calculate gradient w.r.t layer weights
grad_w = self.layer_input.T.dot(accum_grad)
grad_w0 = np.sum(accum_grad, axis=0, keepdims=True)
# Update the layer weights
self.W = self.W_opt.update(self.W, grad_w)
self.w0 = self.w0_opt.update(self.w0, grad_w0)
# Return accumulated gradient for next layer
# Calculated based on the weights used during the forward pass
accum_grad = accum_grad.dot(W.T)
return accum_grad
def output_shape(self):
return (self.n_units, )
全连接神经网络做线性回归
一、定义前向、后向传播
本文将用numpy实现全连接层的前向过程和反向过程,并使用一个线性回归作为例子进行测试;
import numpy as np
def fc_forword(z, W, b):
"""
全连接层的前向传播
:param z: 当前层的输出
:param W: 当前层的权重
:param b: 当前层的偏置
:return: 下一层的输出
"""
return np.dot(z, W) + b
def fc_backword(next_dz, W, z):
"""
全连接层的反向传播
:param next_dz: 下一层的梯度
:param W: 当前层的权重
:param z: 当前层的输出
:return:
"""
N = z.shape[0]
dz = np.dot(next_dz, W.T) # 当前层的梯度
dw = np.dot(z.T, next_dz) # 当前层权重的梯度
db = np.sum(next_dz,axis=0) # 当前层偏置的梯度, N个样本的梯度求和
return dw/N, db/N, dz
二、定义损失函数
def mean_squared_loss(y_predict,y_true):
"""
均方误差损失函数
:param y_predict: 预测值,shape (N,d),N为批量样本数
:param y_true: 真实值
:return:
"""
loss = np.mean(np.sum(np.square(y_predict-y_true),axis=-1)) # 损失函数值
dy = y_predict - y_true # 损失函数关于网络输出的梯度
return loss, dy
三、初始化数据
# 实际的权重和偏置
W = np.array([[3,7,4],
[5,2,6]])
b = np.array([2,9,3])
# 产生训练样本
x_data = np.random.randint(0,10,1000).reshape(500,2)
y_data = np.dot(x_data,W)+b
def next_sample(batch_size=1):
idx=np.random.randint(500)
return x_data[idx:idx+batch_size],y_data[idx:idx+batch_size]
print("x.shape:{},y.shape:{}".format(x_data.shape,y_data.shape))
x.shape:(500, 2),y.shape:(500, 3)
四、定义网络、使用SGD训练
# 随机初始化参数
W1 = np.random.randn(2,3)
b1 = np.zeros([3])
loss = 100.0
lr = 0.01
i = 0
while loss > 1e-15:
x,y_true=next_sample(2) # 获取当前样本
# 前向传播
y = fc_forword(x,W1,b1)
# 反向传播更新梯度
loss,dy=mean_squared_loss(y,y_true)
dw,db,_ = fc_backword(dy,W,x)
# 在一个batch中梯度取均值
#print(dw)
# 更新梯度
W1 -= lr*dw
b1 -= lr*db
# 更新迭代次数
i += 1
if i % 1000 == 0:
print("\n迭代{}次,当前loss:{}, 当前权重:{},当前偏置{}".format(i,loss,W1,b1))
# 打印最终结果
print("\n迭代{}次,当前loss:{}, 当前权重:{},当前偏置{}".format(i,loss,W1,b1))
迭代1000次,当前loss:0.43387298848896233, 当前权重:[[3.01734672 7.12785625 4.02756123]
[5.0221794 2.16347613 6.0352396 ]],当前偏置[1.81757802 7.65543542 2.71016 ]
迭代2000次,当前loss:0.024775748245913158, 当前权重:[[3.00242166 7.01784918 4.00384764]
[5.00295757 2.02179914 6.00469912]],当前偏置[1.96775495 8.76233376 2.94876766]
迭代3000次,当前loss:0.00014564406568725818, 当前权重:[[3.00082136 7.00605396 4.00130502]
[5.00061563 2.00453758 6.00097814]],当前偏置[1.99381124 8.95438495 2.99016703]
迭代4000次,当前loss:2.6237167410353415e-05, 当前权重:[[3.0001119 7.00082475 4.00017779]
[5.00008191 2.0006037 6.00013014]],当前偏置[1.99885749 8.99157899 2.99818473]
迭代5000次,当前loss:3.713805657221762e-07, 当前权重:[[3.00002322 7.00017112 4.00003689]
[5.00001109 2.00008176 6.00001763]],当前偏置[1.99979785 8.99851001 2.99967881]
迭代6000次,当前loss:8.807646869757514e-09, 当前权重:[[3.0000031 7.00002283 4.00000492]
[5.00000397 2.00002927 6.00000631]],当前偏置[1.99996212 8.9997208 2.99993981]
迭代7000次,当前loss:1.536245925844849e-10, 当前权重:[[3.00000073 7.00000539 4.00000116]
[5.00000067 2.00000494 6.00000106]],当前偏置[1.99999324 8.99995017 2.99998926]
迭代7398次,当前loss:3.3297294256090265e-16, 当前权重:[[3.00000043 7.00000318 4.00000069]
[5.0000004 2.00000294 6.00000063]],当前偏置[1.99999655 8.99997456 2.99999452]
print("W1==W: {} \nb1==b: {}".format(np.allclose(W1,W),np.allclose(b1,b)))
W1==W: True
b1==b: True
全连接神经网络做mnist手写数字识别
一、定义前向、后向传播
本文将用numpy实现dnn, 并测试mnist手写数字识别
网络结构如下,包括3个fc层: input(28*28)=> fc (256) => relu => fc(256) => relu => fc(10)
import numpy as np
# 定义权重、神经元、梯度
weights={}
weights_scale=1e-3
weights["W1"]=weights_scale*np.random.randn(28*28,256)
weights["b1"]=np.zeros(256)
weights["W2"]=weights_scale*np.random.randn(256,256)
weights["b2"]=np.zeros(256)
weights["W3"]=weights_scale*np.random.randn(256,10)
weights["b3"]=np.zeros(10)
nuerons={}
gradients={}
from nn.layers import fc_forward
from nn.activations import relu_forward
# 定义前向过程
def forward(X):
nuerons["z2"]=fc_forward(X,weights["W1"],weights["b1"])
nuerons["z2_relu"]=relu_forward(nuerons["z2"])
nuerons["z3"]=fc_forward(nuerons["z2_relu"],weights["W2"],weights["b2"])
nuerons["z3_relu"]=relu_forward(nuerons["z3"])
nuerons["y"]=fc_forward(nuerons["z3_relu"],weights["W3"],weights["b3"])
return nuerons["y"]
from nn.losses import cross_entropy_loss
from nn.layers import fc_backward
from nn.activations import relu_backward
# 定义后向过程
def backward(X,y_true):
loss,dy=cross_entropy_loss(nuerons["y"],y_true)
gradients["W3"],gradients["b3"],gradients["z3_relu"]=fc_backward(dy,weights["W3"],nuerons["z3_relu"])
gradients["z3"]=relu_backward(gradients["z3_relu"],nuerons["z3"])
gradients["W2"],gradients["b2"],gradients["z2_relu"]=fc_backward(gradients["z3"],
weights["W2"],nuerons["z2_relu"])
gradients["z2"]=relu_backward(gradients["z2_relu"],nuerons["z2"])
gradients["W1"],gradients["b1"],_=fc_backward(gradients["z2"],
weights["W1"],X)
return loss
# 获取精度
def get_accuracy(X,y_true):
y_predict=forward(X)
return np.mean(np.equal(np.argmax(y_predict,axis=-1),
np.argmax(y_true,axis=-1)))
二、加载数据
mnist.pkl.gz数据源: http://deeplearning.net/data/mnist/mnist.pkl.gz
from nn.load_mnist import load_mnist_datasets
from nn.utils import to_categorical
train_set, val_set, test_set = load_mnist_datasets('mnist.pkl.gz')
train_y,val_y,test_y=to_categorical(train_set[1]),to_categorical(val_set[1]),to_categorical(test_set[1])
# 随机选择训练样本
train_num = train_set[0].shape[0]
def next_batch(batch_size):
idx=np.random.choice(train_num,batch_size)
return train_set[0][idx],train_y[idx]
x,y= next_batch(16)
print("x.shape:{},y.shape:{}".format(x.shape,y.shape))
x.shape:(16, 784),y.shape:(16, 10)
# 可视化
import matplotlib.pyplot as plt
digit=train_set[0][3]
plt.imshow(np.reshape(digit,(28,28)))
plt.show()
三、训练
# 初始化变量
batch_size=32
epoch = 3
steps = train_num // batch_size
lr = 0.1
for e in range(epoch):
for s in range(steps):
X,y=next_batch(batch_size)
# 前向过程
forward(X)
loss=backward(X,y)
# 更新梯度
for k in ["W1","b1","W2","b2","W3","b3"]:
weights[k]-=lr*gradients[k]
if s % 500 ==0:
print("\n epoch:{} step:{} ; loss:{}".format(e,s,loss))
print(" train_acc:{}; val_acc:{}".format(get_accuracy(X,y),get_accuracy(val_set[0],val_y)))
print("\n final result test_acc:{}; val_acc:{}".
format(get_accuracy(test_set[0],test_y),get_accuracy(val_set[0],val_y)))
epoch:0 step:0 ; loss:2.302584820875885
train_acc:0.1875; val_acc:0.103
epoch:0 step:200 ; loss:2.3089974735813046
train_acc:0.0625; val_acc:0.1064
epoch:0 step:400 ; loss:2.3190137162037106
train_acc:0.0625; val_acc:0.1064
epoch:0 step:600 ; loss:2.29290016314387
train_acc:0.1875; val_acc:0.1064
epoch:0 step:800 ; loss:2.2990879829286004
train_acc:0.125; val_acc:0.1064
epoch:0 step:1000 ; loss:2.2969247354797817
train_acc:0.125; val_acc:0.1064
epoch:0 step:1200 ; loss:2.307249383676819
train_acc:0.09375; val_acc:0.1064
epoch:0 step:1400 ; loss:2.3215380862102757
train_acc:0.03125; val_acc:0.1064
epoch:1 step:0 ; loss:2.2884130059797547
train_acc:0.25; val_acc:0.1064
epoch:1 step:200 ; loss:1.76023258152068
train_acc:0.34375; val_acc:0.2517
epoch:1 step:400 ; loss:1.4113708080481038
train_acc:0.40625; val_acc:0.3138
epoch:1 step:600 ; loss:1.4484238805860425
train_acc:0.53125; val_acc:0.5509
epoch:1 step:800 ; loss:0.4831932927037818
train_acc:0.9375; val_acc:0.7444
epoch:1 step:1000 ; loss:0.521746944367524
train_acc:0.84375; val_acc:0.8234
epoch:1 step:1200 ; loss:0.5975823718636631
train_acc:0.875; val_acc:0.8751
epoch:1 step:1400 ; loss:0.39426304417143254
train_acc:0.9375; val_acc:0.8939
epoch:2 step:0 ; loss:0.3392397455325375
train_acc:0.9375; val_acc:0.8874
epoch:2 step:200 ; loss:0.2349061434167009
train_acc:0.96875; val_acc:0.9244
epoch:2 step:400 ; loss:0.1642980488678663
train_acc:0.96875; val_acc:0.9223
epoch:2 step:600 ; loss:0.18962678031295344
train_acc:1.0; val_acc:0.9349
epoch:2 step:800 ; loss:0.1374088809322303
train_acc:1.0; val_acc:0.9365
epoch:2 step:1000 ; loss:0.45885105735878895
train_acc:0.96875; val_acc:0.939
epoch:2 step:1200 ; loss:0.049076886226820146
train_acc:1.0; val_acc:0.9471
epoch:2 step:1400 ; loss:0.3464252344080918
train_acc:0.9375; val_acc:0.9413
epoch:3 step:0 ; loss:0.2719433362166901
train_acc:0.96875; val_acc:0.9517
epoch:3 step:200 ; loss:0.06844332074679768
train_acc:1.0; val_acc:0.9586
epoch:3 step:400 ; loss:0.16346902137921188
train_acc:1.0; val_acc:0.9529
epoch:3 step:600 ; loss:0.15661875582989374
train_acc:1.0; val_acc:0.9555
epoch:3 step:800 ; loss:0.10004190054365474
train_acc:1.0; val_acc:0.9579
epoch:3 step:1000 ; loss:0.20624793312023684
train_acc:0.96875; val_acc:0.9581
epoch:3 step:1200 ; loss:0.016292493383161803
train_acc:1.0; val_acc:0.9602
epoch:3 step:1400 ; loss:0.08761421046492293
train_acc:1.0; val_acc:0.9602
epoch:4 step:0 ; loss:0.23058956036352923
train_acc:0.9375; val_acc:0.9547
epoch:4 step:200 ; loss:0.14973880899309255
train_acc:0.96875; val_acc:0.9674
epoch:4 step:400 ; loss:0.4563995699690676
train_acc:0.9375; val_acc:0.9667
epoch:4 step:600 ; loss:0.03818259411193518
train_acc:1.0; val_acc:0.9641
epoch:4 step:800 ; loss:0.18057951765239755
train_acc:1.0; val_acc:0.968
epoch:4 step:1000 ; loss:0.05313018618481231
train_acc:1.0; val_acc:0.9656
epoch:4 step:1200 ; loss:0.07373341371929959
train_acc:1.0; val_acc:0.9692
epoch:4 step:1400 ; loss:0.0499225679993673
train_acc:1.0; val_acc:0.9696
final result test_acc:0.9674; val_acc:0.9676
# 查看预测结果
x,y=test_set[0][5],test_y[5]
plt.imshow(np.reshape(x,(28,28)))
plt.show()
y_predict = np.argmax(forward([x])[0])
print("y_true:{},y_predict:{}".format(np.argmax(y),y_predict))
y_true:1,y_predict:1