MXNet创建新的操作层(详细)

本文主要介绍如何利用MXNet创建新的操作层。本文主要参考MXNet官网关于构建softmax层的例子[1],以及使用numpy定制新操作[2]两个部分。注意,这里的softmax层是指softmax损失层,也就是实际上是指softmax操作和交叉熵损失函数共同组成的层。

第一部分中,我们介绍创建softmax损失层;第二部分,我们将这个损失层用到mnist分类中;第三部分,我们介绍sigmoid层的创建。建议先看第三部分,因为sigmoid层的构建比较简单,其前向和反向操作更加简单;而softmax损失层的操作要复杂一些。

构建一个新的层,包括以下几个步骤:

  • 前向操作
  • 反向操作
  • 输入参数
  • 输出参数
  • 维度推理
  • 类型推理
  • 创建实例

一、创建softmax损失层

具体代码如下:

import mxnet as mx 
import numpy as np 


class NewSoftmax(mx.operator.CustomOp):
    def forward(self, is_train, req, in_data, out_data, aux):
        """实现前向操作

        is_train: bool, 训练还是测试模式
        req: {'null','write','inplace','add'}的列表,决定怎么赋值。如果是null,就直接跳过赋值
        in_data: NDArray列表, 输入数据
        out_data: NDArray列表, 输出数据,预先已经分配了内存
        aux: NDArray列表, 附加状态,通过不会用到
        """
        x = in_data[0].asnumpy()
        y = np.exp(x-x.max(axis=1).reshape((x.shape[0],1)))
        y /= y.sum(axis=1).reshape((x.shape[0],1))
        self.assign(out_data[0],req[0],mx.nd.array(y))
    
    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        """实现反向操作

        req: 如前向操作
        out_grad: NDArray列表, 梯度(对应的输出数据段,可以理解为网络的后端)
        in_grad: NDArray列表, 梯度(对应的输入数据段,可以理解为网络的前端),因此这个值是反向操作的输出
        """
        l = in_data[1].asnumpy().ravel().astype(np.int)
        y = out_data[0].asnumpy()
        y[np.arange(l.shape[0]),l]-=1.0
        self.assign(in_grad[0],req[0], mx.nd.array(y))


# 我们需要对创建的操作层进行注册,这样mxnet才能识别,因此,我们需要继承mx.operator.CustomOpProp类。
@mx.operator.register("newsoftmax")
class NewSoftmaxProp(mx.operator.CustomOpProp):
    def __init__(self):
        super(NewSoftmaxProp, self).__init__(need_top_grad=False)  #因为不需要后面层的梯度,因此设置为false
    
    def list_arguments(self):
        # 如果只有一个输入,那么可以省略
        return ['data','label']
    
    def list_outputs(self):
        # 如果只有一个输出,那么可以省略
        return ['output']

    def infer_shape(self, in_shape):
        """通过输入的维度计算输出维度,也就是在进行维度推理。如果输出和输入维度相同,可以省略。

        in_shape: 维度列表, 维度是一个int类型的元组
        """
        data_shape = in_shape[0]
        label_shape = (in_shape[0][0],)
        output_shape = in_shape[0]
        # 返回三个列表,分别代表输入维度,输出维度,附加状态数据维度(aux data shapes)
        return [data_shape, label_shape],[output_shape],[]

    def infer_type(self, in_type):
        return in_type, [in_type[0]],[]
    
    def create_operator(self, ctx, shapes, dtypes):    # 这里一定要有这个创建实例的函数,否则会有bug,使得模型无法收敛。
        # 创建并返回定制类
        return NewSoftmax()

二、分类网络

结合上面创建的softmax损失层,我们将其用在mnist字符数据集中进行分类,代码如下:

# encoding:utf-8

import logging       # 对于输出每一轮的训练信息很重要
logging.getLogger().setLevel(logging.INFO)

import os
import mxnet as mx
from mxnet import nd

from new_operation import NewSoftmax        # 载入新的层

# 准备数据,并放到NDArrayIter迭代器中
mnist = mx.test_utils.get_mnist()

mx.random.seed(42)

batch_size = 100
train_iter = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size, shuffle=True, 
                               data_name='data', label_name='newsoftmax_label')       # 这里有个坑,这里需要指明label的名字,否则会默认为'softmax_label',导致代码在维度推理时产生错误。
val_iter = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size,
                             data_name='data', label_name='newsoftmax_label')

# 定义网络
data = mx.sym.var('data')
conv1 = mx.sym.Convolution(data=data, kernel=(3,3), num_filter=20)
relu1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=relu1, pool_type="max", kernel=(2,2), stride=(2,2))

conv2 = mx.sym.Convolution(data=pool1, kernel=(3,3), num_filter=20)
relu2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))

flatten = mx.sym.flatten(data=pool2)
fc1 = mx.sym.FullyConnected(data=flatten, num_hidden=500)
relu3 = mx.sym.Activation(data=fc1, act_type="relu")

fc2 = mx.sym.FullyConnected(data=relu3, num_hidden=10)

# cnn_symbol = mx.sym.SoftmaxOutput(data=fc2, name="softmax")
cnn_symbol = mx.sym.Custom(data=fc2, name= "newsoftmax", op_type = "newsoftmax")   #调用创建的新层。这里的op_type就是我们创建的新层的注册值(register)。

# 定义module
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
cnn_model = mx.mod.Module(symbol=cnn_symbol, context=ctx,
                          data_names=["data"],
                          label_names=["newsoftmax_label"])

# 训练
cnn_model.fit(train_iter, eval_data=val_iter, optimizer='sgd', optimizer_params={'learning_rate':0.1},
              batch_end_callback = mx.callback.Speedometer(batch_size, 100),   # 100个batch以后输出一次训练信息
              eval_metric='acc', 
              num_epoch=10)  # 训练10个epochs,也就是训练集数据走10遍

# 测试
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, batch_size)
prob = cnn_model.predict(test_iter)   # 测试1

test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], batch_size)
acc = mx.metric.Accuracy()
cnn_model.score(test_iter, acc)   # 测试2
print(acc)
assert acc.get()[1] > 0.98, "Achieved accuracy (%f) is lower than expected (0.98)" % acc.get()[1]

三、创建sigmoid层

具体代码如下:

class Sigmoid(mx.operator.CustomOp):
    def forward(self, is_train, req, in_data, out_data, aux):
        """Implements forward computation.

        is_train : bool, whether forwarding for training or testing.
        req : list of {'null', 'write', 'inplace', 'add'}, how to assign to out_data. 'null' means skip assignment, etc.
        in_data : list of NDArray, input data.
        out_data : list of NDArray, pre-allocated output buffers.
        aux : list of NDArray, mutable auxiliary states. Usually not used.
        """
        x = in_data[0].asnumpy()
        y = 1.0 / (1.0 + np.exp(-x))
        self.assign(out_data[0], req[0], mx.nd.array(y))

    def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
        """Implements backward computation

        req : list of {'null', 'write', 'inplace', 'add'}, how to assign to in_grad
        out_grad : list of NDArray, gradient w.r.t. output data.
        in_grad : list of NDArray, gradient w.r.t. input data. This is the output buffer.
        """
        y = out_data[0].asnumpy()
        dy = out_grad[0].asnumpy()
        dx = dy*(1.0 - y)*y
        self.assign(in_grad[0], req[0], mx.nd.array(dx))


@mx.operator.register("sigmoid")  # register with name "sigmoid"
class SigmoidProp(mx.operator.CustomOpProp):
    def __init__(self):
        super(SigmoidProp, self).__init__(True)

    def list_arguments(self):
        #  this can be omitted if you only have 1 input.
        return ['data']

    def list_outputs(self):
        #  this can be omitted if you only have 1 output.
        return ['output']

    def infer_shape(self, in_shapes):
        """Calculate output shapes from input shapes. This can be
        omited if all your inputs and outputs have the same shape.

        in_shapes : list of shape. Shape is described by a tuple of int.
        """
        data_shape = in_shapes[0]
        output_shape = data_shape
        # return 3 lists representing inputs shapes, outputs shapes, and aux data shapes.
        return (data_shape,), (output_shape,), ()

    def create_operator(self, ctx, in_shapes, in_dtypes):
        #  create and return the CustomOp class.
        return Sigmoid()

x = mx.nd.array([0, 1, 2, 3])
# attach gradient buffer to x for autograd
x.attach_grad()
# forward in a record() section to save computation graph for backward
# see autograd tutorial to learn more.
with autograd.record():
    y = mx.nd.Custom(x, op_type='sigmoid')
print(y)

# call backward computation
y.backward()
# gradient is now saved to the grad buffer we attached previously
print(x.grad)

以上介绍了不带参数的新层的创建方法。下一讲将是带有参数的新层的创建方法。

参考

[1] https://github.com/apache/incubator-mxnet/blob/master/example/numpy-ops/custom_softmax.py
[2] https://mxnet.incubator.apache.org/tutorials/gluon/customop.html
[3] https://blog.csdn.net/qq_25491201/article/details/51284416

你可能感兴趣的:(MXNet)