github地址:
本文安装的环境是RTX3090+CUDA11.1+CUDNN8.0.5+Pytorch1.8.0+PYTHON3.7.0
# basic python libraries
conda create --name centerpoint python=3.7
conda activate centerpoint
conda install pytorch==1.8.0 torchvision==0.9.0 torchaudio==0.8.0 cudatoolkit=11.1 -c pytorch -c conda-forge
git clone https://hub.fastgit.org/CarkusL/CenterPoint.git
cd CenterPoint
pip install -r requirements.txt
# add CenterPoint to PYTHONPATH by adding the following line to ~/.bashrc (change the path accordingly)
export PYTHONPATH="${PYTHONPATH}:PATH_TO_CENTERPOINT"
git clone https://github.com/tianweiy/nuscenes-devkit
# add the following line to ~/.bashrc and reactivate bash (remember to change the PATH_TO_NUSCENES_DEVKIT value)
export PYTHONPATH="${PYTHONPATH}:PATH_TO_NUSCENES_DEVKIT/python-sdk"
# 加到~/.bashrc里面
export PATH=/usr/local/cuda-11.1/bin:$PATH
export CUDA_PATH=/usr/local/cuda-11.1
export CUDA_HOME=/usr/local/cuda-11.1
export LD_LIBRARY_PATH=/usr/local/cuda-11.1/lib64:$LD_LIBRARY_PATH
source ~/.bashrc
bash setup.sh
中间编译deform_conv_cuda
报错
解决办法:
在CenterPoint/det3d/ops/dcn/src/deform_conv_cuda.cpp
中用TORCH_CHECK
替换掉AT_CHECK
git clone https://github.com/NVIDIA/apex
cd apex
git checkout 5633f6 # recent commit doesn't build in our system
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
sudo apt-get install libboost-all-dev
git clone https://github.com/traveller59/spconv.git --recursive
cd spconv && git checkout 7342772
python setup.py bdist_wheel
cd ./dist && pip install *
在python setup.py bdist_wheel
时报错:
error: no matching function for call to ‘torch::jit::RegisterOperators::RegisterOperators(const char [28], <unresolved overloaded function type>)’
解决办法:
将torch::jit::RegisterOperators()
替换成torch::RegisterOperators()
nuscenes数据集目录结构
# For nuScenes Dataset
└── NUSCENES_DATASET_ROOT
├── samples <-- key frames
├── sweeps <-- frames without annotation
├── maps <-- unused
├── v1.0-mini <-- metadata
mkdir data
cd data
mkdir nuScenes
cd nuScenes
#下面使用软连接或者拷贝的方式把前面提到的目录结构放进去
#接着运行以下代码,其中NUSCENES_TRAINVAL_DATASET_ROOT代表到nuScenes文件夹的路径
python tools/create_data.py nuscenes_data_prep --root_path=NUSCENES_TRAINVAL_DATASET_ROOT --version="v1.0-mini" --nsweeps=10
python tools/export_pointpillars_onnx.py
解决办法:
初步认为是pytorch版本问题,使用CUDA10.2+CUDNN8.2.2+Pytorch1.7.0+PYTHON3.7.0之后没有问题
python tools/simplify_model.py
python tools/merge_pfe_rpn_model.py
参考地址
将CenterPoint/tensorrt/samples
文件夹下的文件放到tensorrt主目录下的samples
文件夹下,将CenterPoint/tensorrt/data
文件夹下的文件放到tensorrt主目录下的data
文件夹下
进tensorRT主目录下的samples/centerpoint
中
make
报错:
提示找不到NvInferPlugin.h
解决办法:
sudo cp TENSORRT_ROOT/include/* /usr/include
TENSORRT_ROOT
是TensorRT的主目录
继续报错:
解决办法:
我这边的原因是因为作者可能使用的低版本的tensorrt,而我的版本较高,导致接口对应不上,因此需要修改头文件和cpp文件,修改后如下:
ScatterNDPlugin.h
:
#ifndef BATCHTILEPLUGIN_H
#define BATCHTILEPLUGIN_H
#include "NvInferPlugin.h"
#include
#include
namespace nvinfer1
{
namespace plugin
{
class ScatterNDPlugin : public IPluginV2Ext
{
public:
ScatterNDPlugin(const std::string name, const size_t mOutputSizeAttr[], const size_t inputShapeAttr[], const DataType type);
ScatterNDPlugin(const std::string name, const void* data, size_t length);
ScatterNDPlugin() = delete;
int32_t getNbOutputs() const noexcept override;
Dims getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept override;
int32_t initialize() noexcept override;
void terminate() noexcept override;
size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept override;
int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) noexcept override;
DataType getOutputDataType(
int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void* buffer) const noexcept override;
bool isOutputBroadcastAcrossBatch(
int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept override;
bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept override;
void configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,
bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept override;
bool supportsFormat(DataType type, PluginFormat format) const noexcept override;
AsciiChar const* getPluginType() const noexcept override;
AsciiChar const* getPluginVersion() const noexcept override;
void destroy() noexcept override;
IPluginV2Ext* clone() const noexcept override;
void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept override;
AsciiChar const* getPluginNamespace() const noexcept override;
private:
const std::string mLayerName;
size_t mCopySize;
std::string mNamespace;
DataType mDataType;
size_t mOutputSize[2]; // [H*W, C]
size_t mInputIndexSize[2]; // [H*W, C]
};
class ScatterNDSamplePluginCreator : public IPluginCreator
{
public:
ScatterNDSamplePluginCreator();
AsciiChar const* getPluginNamespace() const noexcept override;
AsciiChar const* getPluginVersion() const noexcept override;
PluginFieldCollection const* getFieldNames() noexcept override;
IPluginV2* createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept override;
IPluginV2* deserializePlugin(AsciiChar const* name, void const* serialData, size_t serialLength) noexcept override;
void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept override
{
mNamespace = pluginNamespace;
}
AsciiChar const* getPluginName() const noexcept override
{
return mNamespace.c_str();
}
private:
static PluginFieldCollection mFC;
static std::vector<PluginField> mPluginAttributes;
std::string mNamespace;
DataType mDataType;
};
} // namespace plugin
} // namespace nvinfer1
#endif
ScatterNDPlugin.cu
:
/**
* For the usage of those member function, please refer to the
* offical api doc.
* https://docs.nvidia.com/deeplearning/tensorrt/api/c_api/classnvinfer1_1_1_i_plugin_v2_ext.html
*/
#include "ScatterNDPlugin.h"
#include
#include
#include
#include "cuda_runtime.h"
#include "cuda_fp16.h"
// Use fp16 mode for inference
#define DATA_TYPE nvinfer1::DataType::kHALF
#define THREAD_NUM 1024
// Helper function for deserializing plugin
template <typename T>
T readFromBuffer(const char*& buffer)
{
T val = *reinterpret_cast<const T*>(buffer);
buffer += sizeof(T);
return val;
}
// Helper function for serializing plugin
template <typename T>
void writeToBuffer(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
buffer += sizeof(T);
}
using namespace nvinfer1;
using nvinfer1::plugin::ScatterNDPlugin;
using nvinfer1::plugin::ScatterNDSamplePluginCreator;
static const char* SCATTERND_PLUGIN_VERSION{"1"};
static const char* SCATTERND_PLUGIN_NAME{"ScatterND"};
PluginFieldCollection ScatterNDSamplePluginCreator::mFC{};
std::vector<PluginField> ScatterNDSamplePluginCreator::mPluginAttributes;
ScatterNDPlugin::ScatterNDPlugin(const std::string name, const size_t outputShapeArray[],
const size_t indexShapeArray[], const DataType type) : mLayerName(name), mDataType(type)
{
mOutputSize[0] = outputShapeArray[0];
mOutputSize[1] = outputShapeArray[1];
mInputIndexSize[0] = indexShapeArray[0];
mInputIndexSize[1] = indexShapeArray[1];
}
ScatterNDPlugin::ScatterNDPlugin(const std::string name, const void* data, size_t length)
: mLayerName(name)
{
const char *d = reinterpret_cast<const char *>(data);
const char *a = d;
mDataType = readFromBuffer<DataType>(d);
mOutputSize[0] = readFromBuffer<size_t>(d);
mOutputSize[1] = readFromBuffer<size_t>(d);
mInputIndexSize[0] = readFromBuffer<size_t>(d);
mInputIndexSize[1] = readFromBuffer<size_t>(d);
assert(d == a + length);
}
int32_t ScatterNDPlugin::getNbOutputs() const noexcept
{
return 1;
}
Dims ScatterNDPlugin::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept
{
// scatterND data input
return Dims2(inputs[0].d[0],inputs[0].d[1]);
}
int32_t ScatterNDPlugin::initialize() noexcept
{
return 0;
}
size_t ScatterNDPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept
{
return 0;
}
DataType ScatterNDPlugin::getOutputDataType(
int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept
{
return inputTypes[2];
}
template <typename Dtype>
__global__ void _ScatterNDKernel(const Dtype *updata_input, const int *indicesInputPtr , Dtype* output,
int channel_num, int max_index_num) {
int idx_num = blockDim.x * blockIdx.x + threadIdx.x;
if (idx_num >= max_index_num) return;
int idx_output = indicesInputPtr[idx_num*2+1];
if (idx_output < 0) return;
for(int idx=0; idx < channel_num; idx++){
output[idx_output*channel_num+idx] = updata_input[idx_num*channel_num+idx];
}
}
int32_t ScatterNDPlugin::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,
cudaStream_t stream) noexcept
{
int channel_num = mOutputSize[1];
int max_index_num = mInputIndexSize[0];
int totalElems = mOutputSize[0]*channel_num;
dim3 blockSize(THREAD_NUM);
dim3 gridsize(max_index_num/blockSize.x+1);
// if you want to inference use fp32, change the DATA_TYPE
switch (mDataType)
{
case nvinfer1::DataType::kFLOAT:
cudaMemset(outputs[0], 0, totalElems * sizeof(float));
_ScatterNDKernel<<<gridsize, blockSize,0,stream>>>(static_cast<float const*> (inputs[2]), static_cast<int32_t const*> (inputs[1]),
static_cast<float *> (outputs[0]), channel_num, max_index_num);
break;
case nvinfer1::DataType::kHALF:
cudaMemset(outputs[0], 0, totalElems * sizeof(float)/2);
_ScatterNDKernel<<<gridsize, blockSize,0,stream>>>(static_cast<int16_t const*> (inputs[2]), static_cast<int32_t const*> (inputs[1]),
static_cast<int16_t *> (outputs[0]), channel_num, max_index_num);
break;
default:
std::cout << "[ERROR]: mDataType dones't support" << std::endl;
}
return 0;
}
void ScatterNDPlugin::serialize(void* buffer) const noexcept
{
char* d = static_cast<char*>(buffer);
char *a = d;
writeToBuffer<DataType>(d, mDataType);
writeToBuffer<size_t>(d, mOutputSize[0]);
writeToBuffer<size_t>(d, mOutputSize[1]);
writeToBuffer<size_t>(d, mInputIndexSize[0]);
writeToBuffer<size_t>(d, mInputIndexSize[1]);
assert(d == a + getSerializationSize());
}
void ScatterNDPlugin::terminate() noexcept {
}
size_t ScatterNDPlugin::getSerializationSize() const noexcept
{
return sizeof(DataType)+ 4*sizeof(size_t);
}
bool ScatterNDPlugin::isOutputBroadcastAcrossBatch(
int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept
{
return false;
}
bool ScatterNDPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept
{
return false;
}
void ScatterNDPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,
DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,
bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept
{
mOutputSize[0] = outputDims[0].d[0];
mOutputSize[1] = outputDims[0].d[1];
mInputIndexSize[0] = inputDims[1].d[0];
mInputIndexSize[1] = inputDims[1].d[1];
}
bool ScatterNDPlugin::supportsFormat(DataType type, PluginFormat format) const noexcept
{
switch (type)
{
case nvinfer1::DataType::kINT32: return true;
case nvinfer1::DataType::kFLOAT: return true;
case nvinfer1::DataType::kHALF: return true;
}
return false;
}
/**
* NO NEED TO MODIFY
*/
AsciiChar const* ScatterNDPlugin::getPluginType() const noexcept
{
return SCATTERND_PLUGIN_NAME;
}
/**
* NO NEED TO MODIFY
*/
AsciiChar const* ScatterNDPlugin::getPluginVersion() const noexcept
{
return SCATTERND_PLUGIN_VERSION;
}
void ScatterNDPlugin::destroy() noexcept
{
delete this;
}
IPluginV2Ext* ScatterNDPlugin::clone() const noexcept
{
auto* plugin = new ScatterNDPlugin(mLayerName, mOutputSize, mInputIndexSize, mDataType);
plugin->setPluginNamespace(mNamespace.c_str());
return plugin;
}
/**
* NO NEED TO MODIFY
*/
void ScatterNDPlugin::setPluginNamespace(AsciiChar const* pluginNamespace) noexcept
{
mNamespace = pluginNamespace;
}
/**
* NO NEED TO MODIFY
*/
AsciiChar const* ScatterNDPlugin::getPluginNamespace() const noexcept
{
return mNamespace.c_str();
}
ScatterNDSamplePluginCreator::ScatterNDSamplePluginCreator()
{
mPluginAttributes.emplace_back(PluginField("output_shape", nullptr, PluginFieldType::kINT32, 3));
mPluginAttributes.emplace_back(PluginField("index_shape", nullptr, PluginFieldType::kINT32, 3));
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
/**
* NO NEED TO MODIFY
*/
AsciiChar const* ScatterNDSamplePluginCreator::getPluginNamespace() const noexcept
{
return SCATTERND_PLUGIN_NAME;
}
/**
* NO NEED TO MODIFY
*/
AsciiChar const* ScatterNDSamplePluginCreator::getPluginVersion() const noexcept
{
return SCATTERND_PLUGIN_VERSION;
}
/**
* NO NEED TO MODIFY
*/
PluginFieldCollection const* ScatterNDSamplePluginCreator::getFieldNames() noexcept
{
return &mFC;
}
IPluginV2* ScatterNDSamplePluginCreator::createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept
{
const nvinfer1::PluginField* fields = fc->fields;
mDataType = DATA_TYPE;
size_t indexShapeArray[2] = {0};
size_t outputShapeArray[2] = {0};
for (int i=0; i<fc->nbFields; i++) {
if(!strcmp(fields[i].name, "output_shape")){
const auto *outputShapeAttr = static_cast<const int32_t*>(fields[i].data);
outputShapeArray[0] = outputShapeAttr[1];
outputShapeArray[1] = outputShapeAttr[2];
}
if(!strcmp(fields[i].name, "index_shape")){
const auto * indexShapeAttr = static_cast<const int32_t*>(fields[i].data);
indexShapeArray[0] = indexShapeAttr[1];
indexShapeArray[1] = indexShapeAttr[2];
}
}
auto* plugin = new ScatterNDPlugin(name, outputShapeArray, indexShapeArray, mDataType);
plugin->setPluginNamespace(mNamespace.c_str());
return plugin;
}
IPluginV2* ScatterNDSamplePluginCreator::deserializePlugin(AsciiChar const* name, void const* serialData, size_t serialLength) noexcept
{
return new ScatterNDPlugin(name, serialData, serialLength);
}
REGISTER_TENSORRT_PLUGIN(ScatterNDSamplePluginCreator);
继续make,报错:
解决办法:
1.下载tensorrt7,将其中lib
文件夹下的libmyelin.so
,libmyelin.so.1
,libmyelin.so.1.1.116
放到现在的TensorRT8根目录中的lib
文件夹下
2.将Makefile.config
中
COMMON_LIBS += $(MYELIN_LIB) $(NVRTC_LIB)
替换为
COMMON_LIBS += $(NVRTC_LIB)
运行CenterPoint
下的TensorRT_Visualize.ipynb
可以生成data,将tensorrt/data
目录下的文件复制到TENSORRT8根目录/data
下
接着在TENSORRT8根目录/bin
文件夹下执行./centerpoint
发现没有输出耗时的输出
解决办法:
将TENSORRT8根目录/samples/centerpoint/samplecenterpoint.cpp
中253行中的路径改为绝对路径
253行原本为:
std::vector<std::string> filePath = glob("../"+mParams.dataDirs[0]+"/points/*.bin");
接着报错:
解决办法:
tensorrt版本换成7.1.3.4,完美解决,记得换完后把/usr/include
下面的NvInfer*.h
删掉,换成最新的。
**PS:**高版本适配低版本也太难了,最后还是妥协了= =