,这一步是非常关键的,一定要确保正确,最好要多测试几遍
#include
#include "Eigen/Dense"
template <typename T>
using ConstEigenVectorArrayMap = Eigen::Map<const Eigen::Array<T, Eigen::Dynamic, 1>>;
template <typename T>
using EigenVectorArrayMap = Eigen::Map<Eigen::Array<T, Eigen::Dynamic, 1>>;
torch::Tensor custom_group_norm(torch::Tensor X, torch::Tensor num_groups, torch::Tensor scale, torch::Tensor bias, torch::Tensor eps) {
float* X_data = X.data<float>();
float* scale_data = scale.data<float>();
float* bias_data = bias.data<float>();
int num_groups_i = int(num_groups.data<float>()[0]);
float epsilon_ = eps.data<float>()[0];
torch::Tensor output = torch::zeros(X.sizes());
float* out = output.data<float>();
const int64_t N = X.size(0);
const int64_t C = X.size(1) / num_groups_i; // assume [N C*num_groups H W] per the spec
int64_t sample_size = 1;
for (size_t i = 2; i < X.dim(); ++i) {
sample_size *= X.size(i);
}
sample_size *= C;
std::vector<float> Xi;
for (auto i = 0; i < N * num_groups_i; ++i) {
ConstEigenVectorArrayMap<float> Xi(X_data + sample_size * i, sample_size);
const float Xi_mean = Xi.mean();
const float squared_norm = (Xi - Xi_mean).matrix().squaredNorm();
const float inv_stdev = 1.0f / std::sqrt(squared_norm / sample_size + epsilon_);
EigenVectorArrayMap<float> Yi(out + sample_size * i, sample_size);
const float channel_scale = inv_stdev * scale_data[i % (C * num_groups_i)];
const float channel_shift = bias_data[i % (C * num_groups_i)] - Xi_mean * channel_scale;
Yi = Xi * channel_scale + channel_shift;
}
return output;
}
static auto registry = torch::RegisterOperators("ygao::custom_group_norm", &custom_group_norm);
其中ygao是指定的你的算子所在的域,大致可以理解成命名空间,比如torch的nn或者jit
torch是通过cpp_externsion来指定编译的,所以我们此时写一个类似cmakelist.txt的setup.py
from setuptools import setup, Extension
from torch.utils import cpp_extension
setup(name='custom_group_norm',
ext_modules=[cpp_extension.CppExtension('custom_group_norm', ['op_custom.cpp'])],
include_dirs = ["/workspace/ygao/software_backup/eigen-eigen-b3f3d4950030"],
cmdclass={'build_ext': cpp_extension.BuildExtension})
在里面要指定你的生成的库文件的名字”custom_group_norm”;需要的所有的源文件,头文件等等,cmdclass都是一样的,不用修改
执行如下命令:
python setup.py install
经过一段时间的编译之后(中途缺啥头文件或者源文件在setup.py中补齐即可),就可以在当前目录下得到build文件夹,其中有一个lib.linux-x86_64-3.7的文件夹,里面就是我们需要的.so文件
创建一个test.py,首先把刚才的库文件load进来,这一步有两种方式
torch.ops.load_library("build/lib.linux-x86_64-3.7/dcn_v2_cpu.cpython-37m-x86_64-linux-gnu.so")
这种方式的op调用方式是
torch.ops.ygao.custom_group_norm(x, num_groups, scale, bias, torch.tensor([0.]))
import custom_group_norm as cop
这种方式的op调用方式是:
cop.ygao.custom_group_norm(x, num_groups, scale, bias, torch.tensor([0.]))
def my_group_norm(g, input, num_groups, scale, bias, eps):
return g.op("ygao::my_group_norm", input, num_groups, scale, bias, epsilon_f=eps)
register_custom_op_symbolic('ygao::custom_group_norm', my_group_norm, 11)
这里需要注意的一点是,这种情况下,即使没有第六步,也可以成功的将模型转出到onnx,但是会报一个未知算子的warning。而这一步就是相当于将my_group_norm这个函数放到了symbol_opset11.py中,并且将函数名换成ygao::custom_group_norm,也就是只要碰到运算ygao::custom_group_norm,就会输出ygao::custom_group_norm类型的算子
def export_custom_op():
class CustomModel(torch.nn.Module):
def forward(self, x, num_groups, scale, bias):
return torch.ops.ygao.custom_group_norm(x, num_groups, scale, bias, torch.tensor([0.]))
X = torch.randn(3, 2, 1, 2)
num_groups = torch.tensor([2.])
scale = torch.tensor([2., 1.])
bias = torch.tensor([1., 0.])
inputs = (X, num_groups, scale, bias)
f = './model.onnx'
torch.onnx.export(CustomModel(), inputs, f,
opset_version=9,
example_outputs=None,
input_names=["X", "num_groups", "scale", "bias"], output_names=["Y"],
custom_opsets={"ygao": 11})
export_custom_op()
这一步就相对比较简单,首先创建一个model,这个model只有custom_group_norm这一个算子,然后定义好input后,通过torch.onnx.export导出即可
Enet本身网络难点有三:
所以开始解决问题:
def max_unpool2d(g, self, indices, output_size):
return g.op("MaxUnpool", self, indices, output_size)
之后开始转换即可得到最初版本的enet.onnx
centernet主要的问题就是如何在torch/onnx添加可变形卷积的支持,解决方式也很简单,只要按照我上面写的添加新算子的步骤即可