目录
1--Tensor算子API
1-1--卷积算子
1-2--激活算子
1-3--池化算子
1-4--FC层算子
2--代码实例
3--编译运行
1--Tensor算子API
TensorRT提供了卷积层、激活函数和池化层三种最常用算子的API:
// 创建一个空的网络
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U);
// 添加卷积层算子
nvinfer1::IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
// 添加激活算子
nvinfer1::IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
// 添加池化算子
nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
1-1--卷积算子
IConvolutionLayer* addConvolutionNd(
ITensor& input,
int32_t nbOutputMaps,
Dims kernelSize,
Weights kernelWeights,
Weights biasWeights
)
第一个参数表示输入的Tensor数据;
第二个参数表示卷积层输出的特征图数,即通道数channel;
第三个参数表示使用的卷积核大小;
第四个参数和第五个参数表示加载的权重;
1-2--激活算子
IActivationLayer* addActivation(
ITensor& input,
ActivationType type
)
第一个参数表示输入的Tensor数据;
第二个参数表示使用的激活函数类型,包括以下激活函数:
enum class ActivationType : int32_t
{
kRELU = 0, //!< Rectified linear activation.
kSIGMOID = 1, //!< Sigmoid activation.
kTANH = 2, //!< TanH activation.
kLEAKY_RELU = 3, //!< LeakyRelu activation: x>=0 ? x : alpha * x.
kELU = 4, //!< Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
kSELU = 5, //!< Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
kSOFTSIGN = 6, //!< Softsign activation: x / (1+|x|)
kSOFTPLUS = 7, //!< Parametric softplus activation: alpha*log(exp(beta*x)+1)
kCLIP = 8, //!< Clip activation: max(alpha, min(beta, x))
kHARD_SIGMOID = 9, //!< Hard sigmoid activation: max(0, min(1, alpha*x+beta))
kSCALED_TANH = 10, //!< Scaled tanh activation: alpha*tanh(beta*x)
kTHRESHOLDED_RELU = 11 //!< Thresholded ReLU activation: x>alpha ? x : 0
};
1-3--池化算子
IPoolingLayer* addPoolingNd(
ITensor& input,
PoolingType type,
Dims windowSize
)
第一个参数表示输入的Tensor数据;
第二个参数表示使用的池化类型;
第三个参数表示池化窗口的大小;
提供的池化类型包括:
enum class PoolingType : int32_t
{
kMAX = 0, // Maximum over elements
kAVERAGE = 1, // Average over elements. If the tensor is padded, the count includes the padding
kMAX_AVERAGE_BLEND = 2 // Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
};
1-4--FC层算子
IFullyConnectedLayer* addFullyConnected(
ITensor& input,
int32_t nbOutputs,
Weights kernelWeights,
Weights biasWeights
)
第一个参数表示输入的Tensor数据;
第二个参数表示输出的通道数;
第三个参数和第四个参数表示加载的权重;
2--代码实例
基于算子 API 搭建 VGG11:(完整可运行的代码参考:liujf69/TensorRT-Demo)
核心程序代码:
// 创建builder和config
nvinfer1::IBuilder* builder = nvinfer1::createInferBuilder(gLogger);
nvinfer1::IBuilderConfig* config = builder->createBuilderConfig();
// 基于builder创建network
nvinfer1::INetworkDefinition* network = builder->createNetworkV2(0U); // 一开始是空的
// 调用API搭建Network
// 创建输入
nvinfer1::ITensor* data = network->addInput(this->INPUT_BLOB_NAME, dt, nvinfer1::Dims3{3, this->INPUT_H, this->INPUT_W});
// 搭建卷积层
nvinfer1::IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, nvinfer1::DimsHW{3, 3}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
conv1->setPaddingNd(nvinfer1::DimsHW{1, 1});
// 搭建激活层
nvinfer1::IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), nvinfer1::ActivationType::kRELU);
// 搭建池化层
nvinfer1::IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{2, 2});
pool1->setStrideNd(nvinfer1::DimsHW{2, 2});
...
// 搭建FC层
nvinfer1::IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool1->getOutput(0), 4096, weightMap["classifier.0.weight"], weightMap["classifier.0.bias"]);
...
// 基于config和network生成engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 20);
nvinfer1::ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
...
主程序代码:
#include "NvInfer.h"
#include "cuda_runtime_api.h"
#include
#include
#include
3--编译运行
mkdir build && cd build
cmake ..
make
./vgg_demo -s
./vgg_demo -d