代码详见SampleCharRNN
1 nvinfer.h中的定义
1.1 处理数据维度的定义
/**
* \class Dims
* \brief structure to define the dimensions of a tensor
*
* \note: currently the following formats are supported for layer inputs and outputs:
* * zero or more index dimensions followed by one channel and two spatial dimensions (e.g. CHW)
* * one time series dimension followed by one index dimension followed by one channel dimension (i.e. TNC)
*/
class Dims
{
public:
static const int MAX_DIMS = 8; //!< the maximum number of dimensions supported for a tensor
int nbDims; //!< the number of dimensions
int d[MAX_DIMS]; //!< the extent of each dimension
DimensionType type[MAX_DIMS]; //!< the type of each dimension
};
1.2 Infer engine中加入PluginLayer
virtual IPluginLayer* addPlugin(ITensor* const* inputs, int nbInputs, IPlugin& plugin) = 0;
1.3 nvidia sample code中的Plugin和PluginFactory
// Reshape plugin to feed RNN into FC layer correctly.
class Reshape : public IPlugin
{
public:
Reshape(size_t size) : mSize(size) {printf("Reshape::Reshape() 111\n");}
Reshape(const void*buf, size_t size)
{
printf("Reshape::Reshape() 222\n");
assert(size == sizeof(mSize));
mSize = *static_cast(buf);
}
int getNbOutputs() const override{printf("Reshape::getNbOutputs()\n");return 1;}
int initialize() override{printf("Reshape::initialize()\n");return 0;}
void terminate() override{printf("Reshape::terminate()\n");}
size_t getWorkspaceSize(int) const override{printf("Reshape::getWorkspaceSize()\n");return 0;}
int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream)
{
printf("Reshape::enqueue()\n");
CHECK(cudaMemcpyAsync(static_cast(outputs[0]),
static_cast(inputs[0]),
sizeof(float) * mSize * batchSize, cudaMemcpyDefault, stream));
return 0;
}
size_t getSerializationSize() override
{
printf("Reshape::getSerializationSize()\n");
return sizeof(mSize);
}
void serialize(void* buffer) override
{
printf("Reshape::serialize()\n");
(*static_cast(buffer)) = mSize;
}
void configure(const Dims*, int, const Dims*, int, int) override{printf("Reshape::configure()\n");}
// The RNN outputs in {L, N, C}, but FC layer needs {C, 1, 1}, so we can convert RNN
// output to {L*N, C, 1, 1} and TensorRT will handle the rest.
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override
{
printf("Reshape::getOutputDimensions()\n");
assert(nbInputDims == 1);
assert(index == 0);
assert(inputs[index].nbDims == 3);
return DimsNCHW(inputs[index].d[1] * inputs[index].d[0], inputs[index].d[2], 1, 1);
}
private:
size_t mSize{0};
};
class PluginFactory : public nvinfer1::IPluginFactory
{
public:
// deserialization plugin implementation
IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) override
{
printf("PluginFactory::createPlugin()\n");
assert(!strncmp(layerName, "reshape", 7));
if (!mPlugin) mPlugin = new Reshape(serialData, serialLength);
return mPlugin;
}
void destroyPlugin()
{
printf("PluginFactory::destroyPlugin()\n");
if (mPlugin) delete mPlugin;
mPlugin = nullptr;
}
private:
Reshape *mPlugin{nullptr};
}; // PluginFactory
1.4 使用Plugin构造推理网络
void APIToModel(std::map &weightMap, IHostMemory **modelStream)
{
// create the builder
IBuilder* builder = createInferBuilder(gLogger);
// create the model to populate the network, then set the outputs and create an engine
INetworkDefinition* network = builder->createNetwork();
Reshape reshape(SEQ_SIZE * BATCH_SIZE * HIDDEN_SIZE);
ITensor *ptr = rnn->getOutput(0);
auto plugin = network->addPlugin(&ptr, 1, reshape);
plugin->setName("reshape");
auto engine = builder->buildCudaEngine(*network);
assert(engine != nullptr);
// we don't need the network any more
network->destroy();
// serialize the engine, then close everything down
(*modelStream) = engine->serialize();
engine->destroy();
builder->destroy();
(*modelStream) = engine->serialize();
engine->destroy();
builder->destroy();
int main(int argc, char** argv)
{
// create a model using the API directly and serialize it to a stream
IHostMemory *modelStream{nullptr};
std::map weightMap = loadWeights(locateFile("char-rnn.wts"));
APIToModel(weightMap, &modelStream);
IRuntime* runtime = createInferRuntime(gLogger);
ICudaEngine* engine = runtime->deserializeCudaEngine(modelStream->data(), modelStream->size(), &pluginFactory);
在增加PluginLayer之后
auto plugin = network->addPlugin(&ptr, 1, reshape);
Dims(数据输入输出的维度信息)
engine->serialize()调用Plugin的getOutputDimensions()方法,知道在特定的Dims(TensorRT 推理网络中的PluginLayer上一层对应的输出)的输入下,PluginLayer的输出的Dims;
engine->serialize()调用对应的输入输出Dims,使用configure(const Dims*, int, const Dims*, int, int)方法对PluginLayer进行配置;
engine->serialize()调用Plugin的serialize(void* buffer)方法,保存当前的配置,供再次构造engine时,使用Reshape(const void*buf, size_t size)方法直接使用buffer初始化配置;
engine->serialize()生成了