mxnet训练模型、导出模型、加载模型 进行预测(python和C++)

mxnet支持将已训练的模型导出成网络和参数分离的json和params文件,方便离线加载进行预测和验证,同时由于mxnet支持python,C++,scala等多种编程语言,这一特性使得mxnet可以在生产系统上部署「fine-tuning」

本文以一个简单那的线性回归训练模型的例子,来介绍如何在mxnet中训练模型,导出模型,加载模型,进行后续预测,其中预测部分采用了python和C++双版本

训练和导出模型

from mxnet import autograd, nd
from mxnet import gluon
from mxnet import init
from mxnet.gluon import nn
from mxnet.gluon import data as gdata
from mxnet.gluon import loss as gloss

# define data format
input_dim = 2
input_num = 100

# prepare label data
true_w = [3, -2.5]
true_b = 7.6

x_label = nd.random.normal(shape=(input_num, input_dim))
y_label = true_w[0] * x_label[:, 0] + true_w[1] * x_label[:, 1] + true_b

# print (x_label)
# print (y_label)

# load input data
batch_size = 10
dataset = gdata.ArrayDataset(x_label, y_label)
data_iter = gdata.DataLoader(dataset, batch_size, shuffle=True)

# define net(model)
net = nn.HybridSequential() # make it hybrid to optimize computing
net.add(nn.Dense(1))
net.initialize()

# loss and optimize functions
loss = gloss.L2Loss()
optimize_method = "sgd"
learning_rate = 0.03
trainer = gluon.Trainer(net.collect_params(), optimize_method, {"learning_rate": learning_rate})

# train
num_epoch = 20
for epoch in range(0, num_epoch):
    for x, y in data_iter:
        with autograd.record():
            temp_loss = loss(net(x), y)
        temp_loss.backward()
        trainer.step(batch_size)
    print ("epoch %d, loss %f" % (epoch, loss(net(x_label), y_label).mean().asnumpy())) 

# the trained parameters
print (net[0].weight.data(), net[0].bias.data())

# test the model
x_test = nd.array([[3, 5], [6, 10], [13, 7]])
net(x_test)

# export net json and param
net.hybridize()
# Please first call block.hybridize() and then run forward with this block at least once before calling export.
net(x_label)
net.export("simple_net", num_epoch)

训练过程


epoch 0, loss 19.267626
epoch 1, loss 10.838810
epoch 2, loss 6.167604
epoch 3, loss 3.535518
epoch 4, loss 2.036959
epoch 5, loss 1.179224
epoch 6, loss 0.684780
epoch 7, loss 0.398763
epoch 8, loss 0.232661
epoch 9, loss 0.135978
epoch 10, loss 0.079521
epoch 11, loss 0.046530
epoch 12, loss 0.027269
epoch 13, loss 0.015979
epoch 14, loss 0.009358
epoch 15, loss 0.005487
epoch 16, loss 0.003218
epoch 17, loss 0.001887
epoch 18, loss 0.001106
epoch 19, loss 0.000649

训练结果
weight和bias

[[ 2.9752004 -2.494717 ]]
1x2 @cpu(0)>, 
[7.570985]
1 @cpu(0)>

加载模型预测数据

python版本

# load model and predicate
import mxnet as mx
import numpy as np

# define test data
batch_size = 1
num_batch = 5
eval_data = np.array([[3, 5], [6,10], [13, 7]])
eval_label = np.zeros(len(eval_data)) # just need to be the same length, empty is ok
eval_iter = mx.io.NDArrayIter(eval_data, eval_label, batch_size, shuffle=False)

# load model
sym, arg_params, aux_params = mx.model.load_checkpoint("simple_net", 20) # load with net name and epoch num
mod = mx.mod.Module(symbol=sym, context=mx.cpu(), data_names=["data"], label_names=[]) # label can be empty
mod.bind(for_training=False, data_shapes=[("data", (1, 2))]) # data shape, 1 x 2 vector for one test data record
mod.set_params(arg_params, aux_params)

# predict
predict_stress = mod.predict(eval_iter, num_batch)

print (predict_stress) # you can transfer to numpy array

输出结果

[[ 4.1365533 ]
 [ 0.69436216]
 [29.17997   ]]
3x1 @cpu(0)>

C++版本

关于mxnet编译并使用C++接口相关配置,请参考

  • windows编译mxnet使用C++
  • linux编译mxnet使用C++

这里仅仅使用mxnet的C++接口加载已导出的模型文件,然后进行前向预测输出
main.cpp

#include 
#include 
#include 
#include 
#include 
#include 

// file read buffer tool
class BufferFile
{
public:
    std::string file_path_;
    std::size_t length_ = 0;
    std::unique_ptr<char[]> buffer_;

    explicit BufferFile(const std::string &file_path)
        : file_path_(file_path)
    {
        std::ifstream ifs(file_path.c_str(), std::ios::in | std::ios::binary);
        if (!ifs)
        {
            std::cerr << "Can't open the file. Please check " << file_path << std::endl;
            return;
        }

        ifs.seekg(0, std::ios::end);
        length_ = static_cast<std::size_t>(ifs.tellg());
        ifs.seekg(0, std::ios::beg);
        std::cout << file_path.c_str() << " ... " << length_ << " bytes\n";

        buffer_.reset(new char[length_]);
        ifs.read(buffer_.get(), length_);
        ifs.close();
    }

    std::size_t GetLength()
    {
        return length_;
    }

    char* GetBuffer()
    {
        return buffer_.get();
    }
};

int main(int argc, char* argv[])
{
    // model file path
    std::string json_file = "model/simple_net-symbol.json";
    std::string param_file = "model/simple_net-0020.params";

    // read model file
    BufferFile json_data(json_file);
    BufferFile param_data(param_file);

    if (json_data.GetLength() == 0 || param_data.GetLength() == 0)
    {
        return EXIT_FAILURE;
    }

    // mxnet parameters
    int dev_type = 1;  // 1: cpu, 2: gpu, we can change
    int dev_id = 0;   // arbitrary.
    mx_uint num_input_nodes = 1;  // 1 for feedforward
    const char *input_key[1] = { "data" };
    const char **input_keys = input_key;

    // define input data shape, notice this must be identical
    const mx_uint input_shape_indptr[2] = { 0, 2 }; // column dim is 2
    const mx_uint input_shape_data[2] = { 3, 2 }; // 3 x 2 matrix input data shape

    // global predicator handler
    PredictorHandle pred_hnd = nullptr;

    // create predictor
    MXPredCreate(static_cast<const char*>(json_data.GetBuffer()),
        static_cast<const char*>(param_data.GetBuffer()),
        static_cast<int>(param_data.GetLength()),
        dev_type,
        dev_id,
        num_input_nodes,
        input_keys,
        input_shape_indptr,
        input_shape_data,
        &pred_hnd);

    if (!pred_hnd)
    {
        std::cerr << "Failed to create predict handler" << std::endl;
        return EXIT_FAILURE;
    }

    // prepare test data
    std::vector input_data{3, 5, 6, 10, 13, 7};

    // set input data for mxnet
    MXPredSetInput(pred_hnd, "data", input_data.data(), input_data.size());

    // do predict forward in mxnet model
    MXPredForward(pred_hnd);

    mx_uint output_index = 0;
    mx_uint *output_shape = nullptr;
    mx_uint ouput_shape_len;

    // get output result
    MXPredGetOutputShape(pred_hnd, output_index, &output_shape, &ouput_shape_len);

    std::size_t size = 1;
    for (mx_uint i = 0; i < ouput_shape_len; ++i) { size *= output_shape[i]; }

    // construct output data from size
    std::vector<float> output_data(size);

    MXPredGetOutput(pred_hnd, output_index, &(output_data[0]), static_cast(size));

    // release preditor
    MXPredFree(pred_hnd);

    // print output data
    std::cout << "the result calculated by trained simple net: " << std::endl;
    for (int i = 0; i < output_data.size(); i++)
        std::cout << output_data[i] << std::endl;

    return EXIT_SUCCESS;
}

输出结果

the result calculated by trained simple net: 
4.13655
0.694362
29.18

基本一致

总结

  • 原理其实很清晰并且简单,就是保存net,然后加载net,说是预测,其实就是输入一些数据,看一下输出是否符合预期或者用来进行后续处理而已
  • 模型导出之前需要输入数据forward一下才能导出
  • python中进行加载数据predict,可以不定义label的占位符,但是必须传一个值进去,全0数组就行
  • C++中输入和输出的数据在内存中都是按照一维数组排列和存储的,所以需要对应处理

你可能感兴趣的:(C/C++,Machine,Learning,python,机器学习)