[TensorRT] 构建推理网络中(自定义层)Plugin Layer使用方法及数据处理格式

代码详见SampleCharRNN

 

1 nvinfer.h中的定义

1.1 处理数据维度的定义

/**
 * \class Dims
 * \brief structure to define the dimensions of a tensor
 *
 * \note: currently the following formats are supported for layer inputs and outputs:
 * * zero or more index dimensions followed by one channel and two spatial dimensions (e.g. CHW)
 * * one time series dimension followed by one index dimension followed by one channel dimension (i.e. TNC)
 */
class Dims
{
public:
    static const int MAX_DIMS = 8; //!< the maximum number of dimensions supported for a tensor
    int nbDims;                    //!< the number of dimensions
    int d[MAX_DIMS];               //!< the extent of each dimension
    DimensionType type[MAX_DIMS];  //!< the type of each dimension
};

1.2 Infer engine中加入PluginLayer

virtual IPluginLayer* addPlugin(ITensor* const* inputs, int nbInputs, IPlugin& plugin) = 0;

1.3 nvidia sample code中的Plugin和PluginFactory

// Reshape plugin to feed RNN into FC layer correctly.
class Reshape : public IPlugin
{
public:
	Reshape(size_t size) : mSize(size) {printf("Reshape::Reshape() 111\n");} 
	Reshape(const void*buf, size_t size)
    {
printf("Reshape::Reshape() 222\n");
        assert(size == sizeof(mSize));
        mSize = *static_cast(buf);
    }
	int getNbOutputs() const override{printf("Reshape::getNbOutputs()\n");return 1;}
	int initialize() override{printf("Reshape::initialize()\n");return 0;}
	void terminate() override{printf("Reshape::terminate()\n");}
	size_t getWorkspaceSize(int) const override{printf("Reshape::getWorkspaceSize()\n");return 0;}
	int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
    {
printf("Reshape::enqueue()\n");
        CHECK(cudaMemcpyAsync(static_cast(outputs[0]),
                   static_cast(inputs[0]),
                   sizeof(float) * mSize * batchSize, cudaMemcpyDefault, stream));
        return 0;
    }
	size_t getSerializationSize() override
    {
printf("Reshape::getSerializationSize()\n");
        return sizeof(mSize);
    }
	void serialize(void* buffer) override
    {
printf("Reshape::serialize()\n");
        (*static_cast(buffer)) = mSize;

    }
	void configure(const Dims*, int, const Dims*, int, int)	override{printf("Reshape::configure()\n");}
    // The RNN outputs in {L, N, C}, but FC layer needs {C, 1, 1}, so we can convert RNN
    // output to {L*N, C, 1, 1} and TensorRT will handle the rest.
	Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
	{
printf("Reshape::getOutputDimensions()\n");
        assert(nbInputDims == 1);
        assert(index == 0);
        assert(inputs[index].nbDims == 3);
		return DimsNCHW(inputs[index].d[1] * inputs[index].d[0], inputs[index].d[2], 1, 1);
	}
    private:
    size_t mSize{0};
};
class PluginFactory : public nvinfer1::IPluginFactory
{
public:
	// deserialization plugin implementation
	IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override
	{
printf("PluginFactory::createPlugin()\n");
        assert(!strncmp(layerName, "reshape", 7));
        if (!mPlugin) mPlugin = new Reshape(serialData, serialLength);
        return mPlugin;
    }
    void destroyPlugin()
    {
printf("PluginFactory::destroyPlugin()\n");
        if (mPlugin) delete mPlugin;
        mPlugin = nullptr;
    }
private:
    Reshape *mPlugin{nullptr};
}; // PluginFactory

1.4 使用Plugin构造推理网络

void APIToModel(std::map &weightMap, IHostMemory **modelStream)
{
    // create the builder
    IBuilder* builder = createInferBuilder(gLogger);

    // create the model to populate the network, then set the outputs and create an engine
    INetworkDefinition* network = builder->createNetwork();
    Reshape reshape(SEQ_SIZE * BATCH_SIZE * HIDDEN_SIZE);
    ITensor *ptr = rnn->getOutput(0);
    auto plugin = network->addPlugin(&ptr, 1, reshape);
    plugin->setName("reshape");
    auto engine = builder->buildCudaEngine(*network);
    assert(engine != nullptr);
    // we don't need the network any more
    network->destroy();


    // serialize the engine, then close everything down
    (*modelStream) = engine->serialize();
    engine->destroy();
    builder->destroy();
(*modelStream) = engine->serialize();
    engine->destroy();
    builder->destroy();
int main(int argc, char** argv)
{
    // create a model using the API directly and serialize it to a stream
    IHostMemory *modelStream{nullptr};

    std::map weightMap = loadWeights(locateFile("char-rnn.wts"));
    APIToModel(weightMap, &modelStream);
    IRuntime* runtime = createInferRuntime(gLogger);

    ICudaEngine* engine = runtime->deserializeCudaEngine(modelStream->data(), modelStream->size(), &pluginFactory);

在增加PluginLayer之后

auto plugin = network->addPlugin(&ptr, 1, reshape);

Dims(数据输入输出的维度信息)

engine->serialize()调用Plugin的getOutputDimensions()方法,知道在特定的Dims(TensorRT 推理网络中的PluginLayer上一层对应的输出)的输入下,PluginLayer的输出的Dims;

engine->serialize()调用对应的输入输出Dims,使用configure(const Dims*, int, const Dims*, int, int)方法对PluginLayer进行配置;

engine->serialize()调用Plugin的serialize(void* buffer)方法,保存当前的配置,供再次构造engine时,使用Reshape(const void*buf, size_t size)方法直接使用buffer初始化配置;

engine->serialize()生成了

 

 

 

 

 

 

 

你可能感兴趣的:(AI)