路线:pytorch -> .wts->.engine
重点:构建网络结构。
参考: https://github.com/wang-xinyu/pytorchx https://github.com/wang-xinyu/tensorrtx
代码:以alexnet为例。
// Creat the engine using only the API and not any parser.
ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt)
{
INetworkDefinition* network = builder->createNetworkV2(0U);
// Create input tensor of shape { 1, 1, 32, 32 } with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
assert(data);
std::map weightMap = loadWeights("../alexnet.wts");
Weights emptywts{DataType::kFLOAT, nullptr, 0};
//(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
//(1): ReLU(inplace=True)
//(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
//# key: features.0.weight
//# value: torch.Size([64, 3, 11, 11])
//# key: features.0.bias
//# value: torch.Size([64])
IConvolutionLayer* conv1 = network->addConvolutionNd(*data, 64, DimsHW{11, 11}, weightMap["features.0.weight"], weightMap["features.0.bias"]);
assert(conv1);
conv1->setStrideNd(DimsHW{4, 4});
conv1->setPaddingNd(DimsHW{2, 2});
// Add activation layer using the ReLU algorithm.
IActivationLayer* relu1 = network->addActivation(*conv1->getOutput(0), ActivationType::kRELU);
assert(relu1);
// Add max pooling layer with stride of 2x2 and kernel size of 2x2.
IPoolingLayer* pool1 = network->addPoolingNd(*relu1->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
assert(pool1);
pool1->setStrideNd(DimsHW{2, 2});
//(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
//(4): ReLU(inplace=True)
//(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False
//# key: features.3.weight
//# value: torch.Size([192, 64, 5, 5])
//# key: features.3.bias
//# value: torch.Size([192])
IConvolutionLayer* conv2 = network->addConvolutionNd(*pool1->getOutput(0), 192, DimsHW{5, 5}, weightMap["features.3.weight"], weightMap["features.3.bias"]);
assert(conv2);
conv2->setPaddingNd(DimsHW{2, 2});
IActivationLayer* relu2 = network->addActivation(*conv2->getOutput(0), ActivationType::kRELU);
assert(relu2);
IPoolingLayer* pool2 = network->addPoolingNd(*relu2->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
assert(pool2);
pool2->setStrideNd(DimsHW{2, 2});
//(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
//(7): ReLU(inplace=True)
//# key: features.6.weight
//# value: torch.Size([384, 192, 3, 3])
//# key: features.6.bias
//# value: torch.Size([384])
IConvolutionLayer* conv3 = network->addConvolutionNd(*pool2->getOutput(0), 384, DimsHW{3, 3}, weightMap["features.6.weight"], weightMap["features.6.bias"]);
assert(conv3);
conv3->setPaddingNd(DimsHW{1, 1});
IActivationLayer* relu3 = network->addActivation(*conv3->getOutput(0), ActivationType::kRELU);
assert(relu3);
//(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
//(9): ReLU(inplace=True)
//# key: features.8.weight
//# value: torch.Size([256, 384, 3, 3])
//# key: features.8.bias
//# value: torch.Size([256])
IConvolutionLayer* conv4 = network->addConvolutionNd(*relu3->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.8.weight"], weightMap["features.8.bias"]);
assert(conv4);
conv4->setPaddingNd(DimsHW{1, 1});
IActivationLayer* relu4 = network->addActivation(*conv4->getOutput(0), ActivationType::kRELU);
assert(relu4);
// (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
// (11): ReLU(inplace=True)
// (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
//
//# key: features.10.weight
//# value: torch.Size([256, 256, 3, 3])
//# key: features.10.bias
//# value: torch.Size([256])
IConvolutionLayer* conv5 = network->addConvolutionNd(*relu4->getOutput(0), 256, DimsHW{3, 3}, weightMap["features.10.weight"], weightMap["features.10.bias"]);
assert(conv5);
conv5->setPaddingNd(DimsHW{1, 1});
IActivationLayer* relu5 = network->addActivation(*conv5->getOutput(0), ActivationType::kRELU);
assert(relu5);
IPoolingLayer* pool3 = network->addPoolingNd(*relu5->getOutput(0), PoolingType::kMAX, DimsHW{3, 3});
assert(pool3);
pool3->setStrideNd(DimsHW{2, 2});
// (0): Dropout(p=0.5, inplace=False)
// (1): Linear(in_features=9216, out_features=4096, bias=True)
// (2): ReLU(inplace=True)
//# key: classifier.1.weight
//# value: torch.Size([4096, 9216])
//# key: classifier.1.bias
//# value: torch.Size([4096])
IFullyConnectedLayer* fc1 = network->addFullyConnected(*pool3->getOutput(0), 4096, weightMap["classifier.1.weight"], weightMap["classifier.1.bias"]);
assert(fc1);
IActivationLayer* relu6 = network->addActivation(*fc1->getOutput(0), ActivationType::kRELU);
assert(relu6);
// (4): Linear(in_features=4096, out_features=4096, bias=True)
// (5): ReLU(inplace=True)
//# key: classifier.4.weight
//# value: torch.Size([4096, 4096])
//# key: classifier.4.bias
//# value: torch.Size([4096])
IFullyConnectedLayer* fc2 = network->addFullyConnected(*relu6->getOutput(0), 4096, weightMap["classifier.4.weight"], weightMap["classifier.4.bias"]);
assert(fc2);
IActivationLayer* relu7 = network->addActivation(*fc2->getOutput(0), ActivationType::kRELU);
assert(relu7);
// (6): Linear(in_features=4096, out_features=1000, bias=True)
//# key: classifier.6.weight
//# value: torch.Size([1000, 4096])
//# key: classifier.6.bias
//# value: torch.Size([1000])
IFullyConnectedLayer* fc3 = network->addFullyConnected(*relu7->getOutput(0), 1000, weightMap["classifier.6.weight"], weightMap["classifier.6.bias"]);
assert(fc3);
fc3->getOutput(0)->setName(OUTPUT_BLOB_NAME);
std::cout << "set name out" << std::endl;
network->markOutput(*fc3->getOutput(0));
// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(1 << 20);
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "build out" << std::endl;
// Don't need the network any more
network->destroy();
// Release host memory
for (auto& mem : weightMap)
{
free((void*) (mem.second.values));
}
return engine;
}
总结: 根据打印的网络信息逐行转化为tensorrt c++ api。