基于Numpy和函数im2col与col2im来实现一个简单的卷积神经网络,将其用于手写体识别。
机器学习的三个基本步骤——
程序设计思路——(此图放大可看清)
1.卷积层实现
import numpy as np
from main import im2col, col2im
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
# 中间数据(backward时使用)
self.x = None
self.col = None
self.col_W = None
# 权重和偏置参数的梯度
self.dW = None
self.db = None
def forward(self, x):
FN, C, FH, FW = self.W.shape # 卷积核的形状
N, C, H, W = x.shape # 输入数据形状
out_h = 1 + int((H + 2 * self.pad - FH) / self.stride) # 输出数据的高
out_w = 1 + int((W + 2 * self.pad - FW) / self.stride) # 输出数据的宽
col = im2col(x, FH, FW, self.stride, self.pad) # 展开的数据
col_W = self.W.reshape(FN, -1).T # 卷积核展开为二维数组
out = np.dot(col, col_W) + self.b # 计算展开后的矩阵乘积
# 输出大小转换为合适的形状
# transpose会更改多维数组的轴的顺序,将输出数据形状由(N,H,W,C)转变为(N,C,H,W)
# 索引(0,1,2,3)对应着(N,H,W,C)
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
# 更新backward过程需要用到的中间数据
self.x = x
self.col = col
self.col_W = col_W
return out
2.池化层实现
import numpy as np
from main import im2col, col2im
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
# 存储backward需用到的中间数据
self.x = None
self.arg_max = None
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride) # 输出数据的高
out_w = int(1 + (W - self.pool_w) / self.stride) # 输出数据的宽
# 展开输入数据
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h * self.pool_w)
arg_max = np.argmax(col, axis=1)
# 求出各行的最大值
out = np.max(col, axis=1)
# 通过reshape方法将数据转换为合适的形状
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
# 保存backward过程中需要用到的中间数据
self.x = x
self.arg_max = arg_max
return out
# 反向传播
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
3.网络搭建
from collections import OrderedDict
import numpy as np
from Convolution import Convolution
from Pooling import Pooling
from SoftmaxWithLoss import SoftmaxWithLoss
from Relu import Relu
from Affine import Affine
class SimpleConvNet:
"""简单的ConvNet
conv - relu - pool - linear - relu - linear - softmax
Parameters
----------
input_size : 输入大小(MNIST的情况下为784)
conv_param : 保存卷积层的超参数(字典)
hidden_size_list : 隐藏层的神经元数量的列表(e.g. [100, 100, 100])
output_size : 输出大小(MNIST的情况下为10)
activation : 'relu' or 'sigmoid'
weight_init_std : 指定权重的标准差(e.g. 0.01)
指定'relu'或'he'的情况下设定“He的初始值”
指定'sigmoid'或'xavier'的情况下设定“Xavier的初始值”
"""
# input_dim是输入数据的通道,高,长
# conv_param是卷积层的超参数
# hidden_size是倒数第二个全连接层神经元数量
# output_size是最后一个全连接层神经元数量
# weight_init_std是权重的标准差
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01):
"""
这里将由初始化参数传入的卷积层的超参数从字典中取了出来(以方便后面使用),然后,计算卷积层和池化层的输出大小。
"""
filter_num = conv_param['filter_num'] # conv_param―卷积层的超参数(字典)。
filter_size = conv_param['filter_size'] # 卷积核的大小
filter_pad = conv_param['pad'] # 步幅
filter_stride = conv_param['stride'] # 填充
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2 * filter_pad) / filter_stride + 1
pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))
# 初始化权重
"""学习所需的参数是第1层的卷积层和剩余两个全连接层的权重和偏置。将这些参数保存在实例变量的params字典中。
将第1层的卷积层的权重设为关键字W1,偏置设为关键字b1。同样,分别用关键字W2、b2和关键字W3、b3来保存第2个和第3个全连接层的权重和偏置。
"""
# 卷积层的参数初始化
self.params = {}
self.params['W1'] = weight_init_std * \
np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
# 两个Linear层的参数的初始化
self.params['W2'] = weight_init_std * \
np.random.randn(pool_output_size,
hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = weight_init_std * \
np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# 生成层
self.layers = OrderedDict()
# 向有序字典(OrderedDict)的layers中添加层
# 依次命名为'Conv1'、'Relu1'、'Pool1'、'Linear1'、'Relu2'、'Affine2'
self.layers['Conv1'] = Convolution(self.params['W1'],
self.params['b1'],
conv_param['stride'],
conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'],
self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'],
self.params['b3'])
self.lastLayer = SoftmaxWithLoss()
"""
参数x是输入数据,t是教师标签。
用于推理的predict方法从头开始依次调用已添加的层,并将结果传递给下一层。
在求损失函数的loss方法中,除了使用 forward方法进行的前向传播处理之外,还会继续进行forward处理,直到到达最后的SoftmaxWithLoss层。
"""
def forward(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def loss(self, x, t):
"""求损失函数
参数x是输入数据、t是数据标签
"""
y = self.forward(x)
return self.lastLayer.forward(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1: t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i * batch_size:(i + 1) * batch_size]
tt = t[i * batch_size:(i + 1) * batch_size]
y = self.forward(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
"""
参数的梯度通过误差反向传播法(反向传播)求出,通过把正向传播和反向传播组装在一起来完
成。因为已经在各层正确实现了正向传播和反向传播的功能,所以这里只需要以合适的顺序调用
即可。最后,把各个权重参数的梯度保存到grads字典中。
"""
def backward(self, x, t):
# 运用误差反向传播法求取梯度
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.lastLayer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 将学习过程中计算出的权重参数梯度保存到grads字典中
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
4.训练模型与结果展示
import numpy as np
import matplotlib.pyplot as plt
from mnist import load_mnist
from optimizer import *
from SimpleConvNet import SimpleConvNet
# 读入数据
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
# 处理花费时间较长的情况下减少数据
x_train, t_train = x_train[:5000], t_train[:5000]
x_test, t_test = x_test[:1000], t_test[:1000]
class Trainer:
"""进行神经网络的训练的类
"""
def __init__(self, network, x_train, t_train, x_test, t_test,
epochs=20, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.01},
evaluate_sample_num_per_epoch=None, verbose=True):
self.network = network
self.verbose = verbose
self.x_train = x_train
self.t_train = t_train
self.x_test = x_test
self.t_test = t_test
self.epochs = epochs
self.batch_size = mini_batch_size
self.evaluate_sample_num_per_epoch = evaluate_sample_num_per_epoch
# 选择优化方式
optimizer_class_dict = {'sgd': SGD, 'momentum': Momentum, 'nesterov': Nesterov,
'adagrad': AdaGrad, 'rmsprpo': RMSprop, 'adam': Adam}
self.optimizer = optimizer_class_dict[optimizer.lower()](**optimizer_param)
self.train_size = x_train.shape[0]
self.iter_per_epoch = max(self.train_size / mini_batch_size, 1)
self.max_iter = int(epochs * self.iter_per_epoch)
self.current_iter = 0
self.current_epoch = 0
self.train_loss_list = []
self.train_acc_list = []
self.test_acc_list = []
def train_step(self):
batch_mask = np.random.choice(self.train_size, self.batch_size)
x_batch = self.x_train[batch_mask]
t_batch = self.t_train[batch_mask]
# 调用网络的backward函数获取梯度
grads = self.network.backward(x_batch, t_batch)
# 使用优化器更新参数
self.optimizer.update(self.network.params, grads)
loss = self.network.loss(x_batch, t_batch)
self.train_loss_list.append(loss)
if self.verbose: print("train loss:" + str(loss))
if self.current_iter % self.iter_per_epoch == 0:
self.current_epoch += 1
x_train_sample, t_train_sample = self.x_train, self.t_train
x_test_sample, t_test_sample = self.x_test, self.t_test
if not self.evaluate_sample_num_per_epoch is None:
t = self.evaluate_sample_num_per_epoch
x_train_sample, t_train_sample = self.x_train[:t], self.t_train[:t]
x_test_sample, t_test_sample = self.x_test[:t], self.t_test[:t]
# 计算训练精度train_acc和测试精度test_acc
train_acc = self.network.accuracy(x_train_sample, t_train_sample)
test_acc = self.network.accuracy(x_test_sample, t_test_sample)
self.train_acc_list.append(train_acc)
self.test_acc_list.append(test_acc)
if self.verbose: print(
"=== epoch:" + str(self.current_epoch) + ", train acc:" + str(train_acc) +
", test acc:" + str(test_acc) + " ===")
self.current_iter += 1
def train(self):
for i in range(self.max_iter): # 训练循环
self.train_step()
test_acc = self.network.accuracy(self.x_test, self.t_test)
if self.verbose:
print("=============== Final Test Accuracy ===============")
print("test acc:" + str(test_acc))
max_epochs = 20
# 将之前定义的SimpleConvNet网络实例化
network = SimpleConvNet(input_dim=(1, 28, 28),
conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=max_epochs, mini_batch_size=100, optimizer='Adam', optimizer_param={'lr': 0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
# 绘制图形
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
1.控制台报错
Traceback (most recent call last):
File “E:/project/pythonProject/08_CNN/Trainer.py”, line 101, in
trainer = Trainer(network, x_train, t_train, x_test, t_test,
File “E:/project/pythonProject/08_CNN/Trainer.py”, line 35, in init
self.optimizer = optimizer_class_dictoptimizer.lower()
TypeError: type object argument after ** must be a mapping, not float
解决:应该将训练类在初始化参数时的lr = 0.001改为optimizer_param={‘lr’:0.01}
原因:定义网络时参数用的是
解决:改为lastLayer即可
本次实验使用numpy和两个展开函数实现了CNN的卷积和池化层,进一步理解了CNN的内部结构,并结合之前的实验搭建了一个简单的卷积神经网络进行手写体识别,采用反向传播和Adam来优化,在较短的时间内得到了较好的实验精度,可见CNN的优势所在。最大的收获是对卷积层和池化层原理的透彻理解。