本文主要介绍如何利用MXNet创建新的操作层。本文主要参考MXNet官网关于构建softmax层的例子[1],以及使用numpy定制新操作[2]两个部分。注意,这里的softmax层是指softmax损失层,也就是实际上是指softmax操作和交叉熵损失函数共同组成的层。
第一部分中,我们介绍创建softmax损失层;第二部分,我们将这个损失层用到mnist分类中;第三部分,我们介绍sigmoid层的创建。建议先看第三部分,因为sigmoid层的构建比较简单,其前向和反向操作更加简单;而softmax损失层的操作要复杂一些。
构建一个新的层,包括以下几个步骤:
具体代码如下:
import mxnet as mx
import numpy as np
class NewSoftmax(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
"""实现前向操作
is_train: bool, 训练还是测试模式
req: {'null','write','inplace','add'}的列表,决定怎么赋值。如果是null,就直接跳过赋值
in_data: NDArray列表, 输入数据
out_data: NDArray列表, 输出数据,预先已经分配了内存
aux: NDArray列表, 附加状态,通过不会用到
"""
x = in_data[0].asnumpy()
y = np.exp(x-x.max(axis=1).reshape((x.shape[0],1)))
y /= y.sum(axis=1).reshape((x.shape[0],1))
self.assign(out_data[0],req[0],mx.nd.array(y))
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
"""实现反向操作
req: 如前向操作
out_grad: NDArray列表, 梯度(对应的输出数据段,可以理解为网络的后端)
in_grad: NDArray列表, 梯度(对应的输入数据段,可以理解为网络的前端),因此这个值是反向操作的输出
"""
l = in_data[1].asnumpy().ravel().astype(np.int)
y = out_data[0].asnumpy()
y[np.arange(l.shape[0]),l]-=1.0
self.assign(in_grad[0],req[0], mx.nd.array(y))
# 我们需要对创建的操作层进行注册,这样mxnet才能识别,因此,我们需要继承mx.operator.CustomOpProp类。
@mx.operator.register("newsoftmax")
class NewSoftmaxProp(mx.operator.CustomOpProp):
def __init__(self):
super(NewSoftmaxProp, self).__init__(need_top_grad=False) #因为不需要后面层的梯度,因此设置为false
def list_arguments(self):
# 如果只有一个输入,那么可以省略
return ['data','label']
def list_outputs(self):
# 如果只有一个输出,那么可以省略
return ['output']
def infer_shape(self, in_shape):
"""通过输入的维度计算输出维度,也就是在进行维度推理。如果输出和输入维度相同,可以省略。
in_shape: 维度列表, 维度是一个int类型的元组
"""
data_shape = in_shape[0]
label_shape = (in_shape[0][0],)
output_shape = in_shape[0]
# 返回三个列表,分别代表输入维度,输出维度,附加状态数据维度(aux data shapes)
return [data_shape, label_shape],[output_shape],[]
def infer_type(self, in_type):
return in_type, [in_type[0]],[]
def create_operator(self, ctx, shapes, dtypes): # 这里一定要有这个创建实例的函数,否则会有bug,使得模型无法收敛。
# 创建并返回定制类
return NewSoftmax()
结合上面创建的softmax损失层,我们将其用在mnist字符数据集中进行分类,代码如下:
# encoding:utf-8
import logging # 对于输出每一轮的训练信息很重要
logging.getLogger().setLevel(logging.INFO)
import os
import mxnet as mx
from mxnet import nd
from new_operation import NewSoftmax # 载入新的层
# 准备数据,并放到NDArrayIter迭代器中
mnist = mx.test_utils.get_mnist()
mx.random.seed(42)
batch_size = 100
train_iter = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size, shuffle=True,
data_name='data', label_name='newsoftmax_label') # 这里有个坑,这里需要指明label的名字,否则会默认为'softmax_label',导致代码在维度推理时产生错误。
val_iter = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size,
data_name='data', label_name='newsoftmax_label')
# 定义网络
data = mx.sym.var('data')
conv1 = mx.sym.Convolution(data=data, kernel=(3,3), num_filter=20)
relu1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))
conv2 = mx.sym.Convolution(data=pool1, kernel=(3,3), num_filter=20)
relu2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
relu3 = mx.sym.Activation(data=fc1, act_type="relu")
fc2 = mx.sym.FullyConnected(data=relu3, num_hidden=10)
# cnn_symbol = mx.sym.SoftmaxOutput(data=fc2, name="softmax")
cnn_symbol = mx.sym.Custom(data=fc2, name= "newsoftmax", op_type = "newsoftmax") #调用创建的新层。这里的op_type就是我们创建的新层的注册值(register)。
# 定义module
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
cnn_model = mx.mod.Module(symbol=cnn_symbol, context=ctx,
data_names=["data"],
label_names=["newsoftmax_label"])
# 训练
cnn_model.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1},
batch_end_callback = mx.callback.Speedometer(batch_size, 100), # 100个batch以后输出一次训练信息
eval_metric='acc',
num_epoch=10) # 训练10个epochs,也就是训练集数据走10遍
# 测试
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = cnn_model.predict(test_iter) # 测试1
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
acc = mx.metric.Accuracy()
cnn_model.score(test_iter, acc) # 测试2
print(acc)
assert acc.get()[1] > 0.98, "Achieved accuracy (%f) is lower than expected (0.98)" % acc.get()[1]
具体代码如下:
class Sigmoid(mx.operator.CustomOp):
def forward(self, is_train, req, in_data, out_data, aux):
"""Implements forward computation.
is_train : bool, whether forwarding for training or testing.
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
in_data : list of NDArray, input data.
out_data : list of NDArray, pre-allocated output buffers.
aux : list of NDArray, mutable auxiliary states. Usually not used.
"""
x = in_data[0].asnumpy()
y = 1.0 / (1.0 + np.exp(-x))
self.assign(out_data[0], req[0], mx.nd.array(y))
def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
"""Implements backward computation
req : list of {'null', 'write', 'inplace', 'add'}, how to assign to in_grad
out_grad : list of NDArray, gradient w.r.t. output data.
in_grad : list of NDArray, gradient w.r.t. input data. This is the output buffer.
"""
y = out_data[0].asnumpy()
dy = out_grad[0].asnumpy()
dx = dy*(1.0 - y)*y
self.assign(in_grad[0], req[0], mx.nd.array(dx))
@mx.operator.register("sigmoid") # register with name "sigmoid"
class SigmoidProp(mx.operator.CustomOpProp):
def __init__(self):
super(SigmoidProp, self).__init__(True)
def list_arguments(self):
# this can be omitted if you only have 1 input.
return ['data']
def list_outputs(self):
# this can be omitted if you only have 1 output.
return ['output']
def infer_shape(self, in_shapes):
"""Calculate output shapes from input shapes. This can be
omited if all your inputs and outputs have the same shape.
in_shapes : list of shape. Shape is described by a tuple of int.
"""
data_shape = in_shapes[0]
output_shape = data_shape
# return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
return (data_shape,), (output_shape,), ()
def create_operator(self, ctx, in_shapes, in_dtypes):
# create and return the CustomOp class.
return Sigmoid()
x = mx.nd.array([0, 1, 2, 3])
# attach gradient buffer to x for autograd
x.attach_grad()
# forward in a record() section to save computation graph for backward
# see autograd tutorial to learn more.
with autograd.record():
y = mx.nd.Custom(x, op_type='sigmoid')
print(y)
# call backward computation
y.backward()
# gradient is now saved to the grad buffer we attached previously
print(x.grad)
以上介绍了不带参数的新层的创建方法。下一讲将是带有参数的新层的创建方法。
[1] https://github.com/apache/incubator-mxnet/blob/master/example/numpy-ops/custom_softmax.py
[2] https://mxnet.incubator.apache.org/tutorials/gluon/customop.html
[3] https://blog.csdn.net/qq_25491201/article/details/51284416