【代码阅读】PVCNN

文章目录

  • Voxelization
    • python
    • cpp
    • cu
      • 前向计算
      • 反向传播
  • devoxelization
    • python
    • cpp
    • cu
      • 前向计算
      • 反向传播

Point-Voxel CNN for Efficient 3D Deep Learning, 2019 NIPS
code:https://github.com/mit-han-lab/pvcnn

文章的的解读可以看我另一篇博客。

PVconv的具体实现在pvcnn-master/modules/pvconv.py

voxel_features, voxel_coords = self.voxelization(features, coords)
voxel_features = self.voxel_layers(voxel_features)
voxel_features = F.trilinear_devoxelize(voxel_features, voxel_coords, self.resolution, self.training)
fused_features = voxel_features + self.point_features(features)

Voxelization

python

首先,根据上面代码可以看到,是把point-wise的feature和coords传入,那我们也跟进去看,pvcnn-master/modules/voxelization.py:

class Voxelization(nn.Module):
    def __init__(self, resolution, normalize=True, eps=0):
        super().__init__()
        self.r = int(resolution)
        self.normalize = normalize
        self.eps = eps

    def forward(self, features, coords)
        coords = coords.detach()
		# 把coords归一到局部坐标系中,先减去均值
        norm_coords = coords - coords.mean(2, keepdim=True)
        if self.normalize:
        	# 找到最远的点当做半径,然后每个点除以2*半径,将坐标归一到[-0.5,0.5],然后加上0.5
            norm_coords = norm_coords / (norm_coords.norm(dim=1, keepdim=True).max(dim=2, keepdim=True).values * 2.0 + self.eps) + 0.5
        else:
            norm_coords = (norm_coords + 1) / 2.0
        # resolution是正整数,将norm_coords从[0,1]放大到[0,r-1]
        norm_coords = torch.clamp(norm_coords * self.r, 0, self.r - 1)
        # 通过round,得到vox_coords,vox_coords的取值是[0,r-1]的整数,一共r个值
        vox_coords = torch.round(norm_coords).to(torch.int32)
        # 前向计算,进行voxelize
        return F.avg_voxelize(features, vox_coords, self.r), norm_coords

把feature和vox_coords传入,那么我们也跟进去看,pvcnn-master/modules/functional/voxelization.py:

class AvgVoxelization(Function):
    @staticmethod
    def forward(ctx, features, coords, resolution):
        """
        :param ctx:
        :param features: Features of the point cloud, FloatTensor[B, C, N]
        :param coords: Voxelized Coordinates of each point, IntTensor[B, 3, N]
        :param resolution: Voxel resolution
        :return:
            Voxelized Features, FloatTensor[B, C, R, R, R]
        """
        features = features.contiguous()
        coords = coords.int().contiguous()
        b, c, _ = features.shape
        # 前向计算
        out, indices, counts = _backend.avg_voxelize_forward(features, coords, resolution)
        ctx.save_for_backward(indices, counts)
        return out.view(b, c, resolution, resolution, resolution)

    @staticmethod
    def backward(ctx, grad_output):
        """
        :param ctx:
        :param grad_output: gradient of output, FloatTensor[B, C, R, R, R]
        :return:
            gradient of inputs, FloatTensor[B, C, N]
        """
        b, c = grad_output.shape[:2]
        indices, counts = ctx.saved_tensors
        # 反向传播
        grad_features = _backend.avg_voxelize_backward(grad_output.contiguous().view(b, c, -1), indices, counts)
        return grad_features, None, None

cpp

前向计算中的coords其实是vox_coords,我们再次跟到c++的程序中,pvcnn-master/modules/functional/src/voxelization/vox.cpp:

/*
  Function: average pool voxelization (forward)
  Args:
    features: features, FloatTensor[b, c, n]
    coords  : coords of each point, IntTensor[b, 3, n]
    resolution : voxel resolution
  Return:
    out : outputs, FloatTensor[b, c, s], s = r ** 3
    ind : voxel index of each point, IntTensor[b, n]
    cnt : #points in each voxel index, IntTensor[b, s]
*/
std::vector<at::Tensor> avg_voxelize_forward(const at::Tensor features,
                                             const at::Tensor coords,
                                             const int resolution) {
  CHECK_CUDA(features);
  CHECK_CUDA(coords);
  CHECK_CONTIGUOUS(features);
  CHECK_CONTIGUOUS(coords);
  CHECK_IS_FLOAT(features);
  CHECK_IS_INT(coords);

  int b = features.size(0);
  int c = features.size(1);
  int n = features.size(2);
  int r = resolution;
  int r2 = r * r;
  int r3 = r2 * r;
  // 在显存开要输出的变量的空间
  at::Tensor ind = torch::zeros(
      {b, n}, at::device(features.device()).dtype(at::ScalarType::Int));
  at::Tensor out = torch::zeros(
      {b, c, r3}, at::device(features.device()).dtype(at::ScalarType::Float));
  at::Tensor cnt = torch::zeros(
      {b, r3}, at::device(features.device()).dtype(at::ScalarType::Int));
  // 调用cuda写的函数
  avg_voxelize(b, c, n, r, r2, r3, coords.data_ptr<int>(),
               features.data_ptr<float>(), ind.data_ptr<int>(),
               cnt.data_ptr<int>(), out.data_ptr<float>());
  return {out, ind, cnt};
}

cu

前向计算

我们再次跟到控制GPU的函数中,pvcnn-master/modules/functional/src/voxelization/vox.cu:

void avg_voxelize(int b, int c, int n, int r, int r2, int r3, const int *coords,
                  const float *feat, int *ind, int *cnt, float *out) {
  // 首先统计每个voxel中有多少个点,这个是计算均值需要的
  grid_stats_kernel<<<b, optimal_num_threads(n)>>>(b, n, r, r2, r3, coords, ind,
                                                   cnt);
  // 求每个voxel的feature
  avg_voxelize_kernel<<<b, optimal_num_threads(n)>>>(b, c, n, r3, ind, cnt,
                                                     feat, out);
  CUDA_CHECK_ERRORS();
}

先看第一个函数:

/*
  Function: get how many points in each voxel grid
  Args:
    b      : batch size
    n      : number of points
    r      : voxel resolution
    r2     : = r * r
    r3     : s, voxel cube size = r ** 3
    coords : coords of each point, IntTensor[b, 3, n]
    ind    : voxel index of each point, IntTensor[b, n]
    cnt    : #points in each voxel index, IntTensor[b, s]
*/
__global__ void grid_stats_kernel(int b, int n, int r, int r2, int r3,
                                  const int *__restrict__ coords,
                                  int *__restrict__ ind, int *cnt) {
  int batch_index = blockIdx.x;
  // stride是代表每个batch用多少个thread来并行计算
  int stride = blockDim.x;
  int index = threadIdx.x;
  // coords是一个指针,把它指到目前的batch上
  coords += batch_index * n * 3;
  ind += batch_index * n;
  cnt += batch_index * r3;

  for (int i = index; i < n; i += stride) {
    // if (ind[i] == -1)
    //   continue;
    // 计算这个点属于哪个voxel,找到其下标
    ind[i] = coords[i] * r2 + coords[i + n] * r + coords[i + n + n];
    // 该voxel内点的数量加1
    atomicAdd(cnt + ind[i], 1);
  }
}

再看第二个函数

/*
  Function: average pool voxelization (forward)
  Args:
    b   : batch size
    c   : #channels
    n   : number of points
    s   : voxel cube size = voxel resolution ** 3
    ind : voxel index of each point, IntTensor[b, n]
    cnt : #points in each voxel index, IntTensor[b, s]
    feat: features, FloatTensor[b, c, n]
    out : outputs, FloatTensor[b, c, s]
*/
__global__ void avg_voxelize_kernel(int b, int c, int n, int s,
                                    const int *__restrict__ ind,
                                    const int *__restrict__ cnt,
                                    const float *__restrict__ feat,
                                    float *__restrict__ out) {
  int batch_index = blockIdx.x;
  int stride = blockDim.x;
  int index = threadIdx.x;
  ind += batch_index * n;
  feat += batch_index * c * n;
  out += batch_index * c * s;
  cnt += batch_index * s;
  for (int i = index; i < n; i += stride) {
    // 找到当前点对应voxel的下标
    int pos = ind[i];
    // if (pos == -1)
    //   continue;
    // 获取该voxel中点的数量
    int cur_cnt = cnt[pos];
    if (cur_cnt > 0) {
      float div_cur_cnt = 1.0 / static_cast<float>(cur_cnt);
      for (int j = 0; j < c; j++) {
        // 更新voxel的feature中的每一个维度
        atomicAdd(out + j * s + pos, feat[j * n + i] * div_cur_cnt);
      }
    }
  }
}

反向传播

接下来来看看反向传播,有了每个点所在voxel的信息,其实反向传播也很简单理解:

/*
  Function: average pool voxelization (backward)
  Args:
    b      : batch size
    c      : #channels
    n      : number of points
    r3     : voxel cube size = voxel resolution ** 3
    ind    : voxel index of each point, IntTensor[b, n]
    cnt    : #points in each voxel index, IntTensor[b, s]
    grad_y : grad outputs, FloatTensor[b, c, s]
    grad_x : grad inputs, FloatTensor[b, c, n]
*/
__global__ void avg_voxelize_grad_kernel(int b, int c, int n, int r3,
                                         const int *__restrict__ ind,
                                         const int *__restrict__ cnt,
                                         const float *__restrict__ grad_y,
                                         float *__restrict__ grad_x) {
  int batch_index = blockIdx.x;
  int stride = blockDim.x;
  int index = threadIdx.x;
  ind += batch_index * n;
  grad_x += batch_index * c * n;
  grad_y += batch_index * c * r3;
  cnt += batch_index * r3;
  for (int i = index; i < n; i += stride) {
    int pos = ind[i];
    // if (pos == -1)
    //   continue;
    int cur_cnt = cnt[pos];
    if (cur_cnt > 0) {
      float div_cur_cnt = 1.0 / static_cast<float>(cur_cnt);
      for (int j = 0; j < c; j++) {
        // 注意这里
        atomicAdd(grad_x + j * n + i, grad_y[j * r3 + pos] * div_cur_cnt);
      }
    }
  }
}

上面代码中我标注了“注意这里”,这个就是链式计算的结果。例如 c = 0.5 ∗ ( a + b ) c=0.5*(a+b) c=0.5(a+b),c的梯度记为 d c dc dc d L / d a = d L / d c × d c / d a = d L / d c × 0.5 dL/da = dL/dc \times dc/da = dL/dc \times 0.5 dL/da=dL/dc×dc/da=dL/dc×0.5

devoxelization

devoxelization用的是三线性插值,也就是用三个维度上的八个临近点插值得到要求取的点。算法可以参考这个博客。这里直接说代码。

python

首先看pvcnn-master/modules/pvconv.py,是直接调用的trilinear_devoxelize函数,这部分没有封装成模块。

def forward(self, inputs):
	features, coords = inputs
	voxel_features, voxel_coords = self.voxelization(features, coords)
	voxel_features = self.voxel_layers(voxel_features)
	# voxel_feature: FloatTensor[B, C, R, R, R],就是通过三维卷积算出来的
	# voxel_coords: FloatTenso[B, 3, N],是经过归一化到(0,r-1]的坐标,具体可以看上面一部分
	voxel_features = F.trilinear_devoxelize(voxel_features, voxel_coords, self.resolution, self.training)
	fused_features = voxel_features + self.point_features(features)
	return fused_features, coords

跟进去看pvcnn-master/modules/functional/devoxelization.py,这块代码没啥,就是直接调用的cpp。

class TrilinearDevoxelization(Function):
    @staticmethod
    def forward(ctx, features, coords, resolution, is_training=True):
        """
        :param ctx:
        :param coords: the coordinates of points, FloatTensor[B, 3, N]
        :param features: FloatTensor[B, C, R, R, R]
        :param resolution: int, the voxel resolution
        :param is_training: bool, training mode
        :return:
            FloatTensor[B, C, N]
        """
        B, C = features.shape[:2]
        features = features.contiguous().view(B, C, -1)
        coords = coords.contiguous()
        outs, inds, wgts = _backend.trilinear_devoxelize_forward(resolution, is_training, coords, features)
        if is_training:
            ctx.save_for_backward(inds, wgts)
            ctx.r = resolution
        return outs

    @staticmethod
    def backward(ctx, grad_output):
        """
        :param ctx: 
        :param grad_output: gradient of outputs, FloatTensor[B, C, N]
        :return:
            gradient of inputs, FloatTensor[B, C, R, R, R]
        """
        inds, wgts = ctx.saved_tensors
        grad_inputs = _backend.trilinear_devoxelize_backward(grad_output.contiguous(), inds, wgts, ctx.r)
        return grad_inputs.view(grad_output.size(0), grad_output.size(1), ctx.r, ctx.r, ctx.r), None, None, None


trilinear_devoxelize = TrilinearDevoxelization.apply

cpp

接下来来看cpp,pvcnn-master/modules/functional/src/interpolate/trilinear_devox.cpp

/*
  Function: trilinear devoxelization (forward)
  Args:
    r        : voxel resolution
    trainig  : whether is training mode
    coords   : the coordinates of points, FloatTensor[b, 3, n]
    features : features, FloatTensor[b, c, s], s = r ** 3
  Return:
    outs : outputs, FloatTensor[b, c, n]
    inds : the voxel coordinates of point cube, IntTensor[b, 8, n]
    wgts : weight for trilinear interpolation, FloatTensor[b, 8, n]
*/
std::vector<at::Tensor>
trilinear_devoxelize_forward(const int r, const bool is_training,
                             const at::Tensor coords,
                             const at::Tensor features) {
  CHECK_CUDA(features);
  CHECK_CUDA(coords);
  CHECK_CONTIGUOUS(features);
  CHECK_CONTIGUOUS(coords);
  CHECK_IS_FLOAT(features);
  CHECK_IS_FLOAT(coords);

  int b = features.size(0);
  int c = features.size(1);
  int n = coords.size(2);
  int r2 = r * r;
  int r3 = r2 * r;
  // 定义输出的变量
  at::Tensor outs = torch::zeros(
      {b, c, n}, at::device(features.device()).dtype(at::ScalarType::Float));
  if (is_training) {
  	// 如果是training,就输出weight和inds,反向传播要用
    at::Tensor inds = torch::zeros(
        {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Int));
    at::Tensor wgts = torch::zeros(
        {b, 8, n}, at::device(features.device()).dtype(at::ScalarType::Float));
    trilinear_devoxelize(b, c, n, r, r2, r3, true, coords.data_ptr<float>(),
                         features.data_ptr<float>(), inds.data_ptr<int>(),
                         wgts.data_ptr<float>(), outs.data_ptr<float>());
    return {outs, inds, wgts};
  } else {
    // 如果不training,就不输出weights和inds
    at::Tensor inds = torch::zeros(
        {1}, at::device(features.device()).dtype(at::ScalarType::Int));
    at::Tensor wgts = torch::zeros(
        {1}, at::device(features.device()).dtype(at::ScalarType::Float));
    trilinear_devoxelize(b, c, n, r, r2, r3, false, coords.data_ptr<float>(),
                         features.data_ptr<float>(), inds.data_ptr<int>(),
                         wgts.data_ptr<float>(), outs.data_ptr<float>());
    return {outs, inds, wgts};
  }
}

cu

前向计算

pvcnn-master/modules/functional/src/interpolate/trilinear_devox.cu

void trilinear_devoxelize(int b, int c, int n, int r, int r2, int r3,
                          bool training, const float *coords, const float *feat,
                          int *inds, float *wgts, float *outs) {
  trilinear_devoxelize_kernel<<<b, optimal_num_threads(n)>>>(
      b, c, n, r, r2, r3, training, coords, feat, inds, wgts, outs);
  CUDA_CHECK_ERRORS();
}

接下来就到了最核心的代码。这一块首先得理解如下:voxel_feature是[R, R, R]的,对应着[R, R, R]个voxel。但具体对应的是voxel中的哪个点呢?从代码中看,是对应着每个voxel在三个维度上最小的角点。例如,第一个voxel所覆盖的范围是([0,1), [0,1), [0,1)),最后一个voxel覆盖的范围是([r-1,r), [r-1,r), [r-1,r))。那么第一个voxel_feature对应的点是(0,0,0),最后一个feature所对应的点是(r-1,r-1,r-1)。这其实就有个问题,边界点怎么处理,就是每个维度上的第R个的voxel中的点怎么处理,该voxel只有部分角点的feature,例如最后一个voxel只有一个角点的feauture。这个就是代码中要解决的问题。

这个特殊情况的解决方法是,在得到点云的坐标的时候,就是经过clamp的,都是(0,r-1]的,当一个点的坐标是r-1的时候,就说明它在这个维度上的最后一个voxel中。反之亦然,当一个点在某个维度的第R个voxel中,它这个维度的坐标必然是r-1。

/*
  Function: trilinear devoxlization (forward)
  Args:
    b   : batch size
    c   : #channels
    n   : number of points
    r   : voxel resolution
    r2  : r ** 2
    r3  : r ** 3
    coords : the coordinates of points, FloatTensor[b, 3, n]
    feat   : features, FloatTensor[b, c, r3]
    inds   : the voxel indices of point cube, IntTensor[b, 8, n]
    wgts   : weight for trilinear interpolation, FloatTensor[b, 8, n]
    outs   : outputs, FloatTensor[b, c, n]
*/
__global__ void trilinear_devoxelize_kernel(int b, int c, int n, int r, int r2,
                                            int r3, bool is_training,
                                            const float *__restrict__ coords,
                                            const float *__restrict__ feat,
                                            int *__restrict__ inds,
                                            float *__restrict__ wgts,
                                            float *__restrict__ outs) {
  int batch_index = blockIdx.x;
  int stride = blockDim.x;
  int index = threadIdx.x;
  // 把指针指向当前的batch
  coords += batch_index * n * 3;
  // 在不training的状态下,指针会指到错误的位置上,但不对wgts和inds指向的显存做操作
  inds += batch_index * n * 8;
  wgts += batch_index * n * 8;
  feat += batch_index * c * r3;
  outs += batch_index * c * n;

  for (int i = index; i < n; i += stride) {
  	// 拿到第i个点的坐标
    float x = coords[i];
    float y = coords[i + n];
    float z = coords[i + n + n];
    // 计算用于插值的第一个点,也就是(floor(x), floor(y), floor(z))
    float x_lo_f = floorf(x);
    float y_lo_f = floorf(y);
    float z_lo_f = floorf(z);
	
	// 因为每一个grid边长都是1,计算第i个点到周围八个点的距离
	// 注意,大多数点都是在grid内部的,由于点云的稀疏性,很难出现在grid的分界面上。
    // 但是,由于voxel_coord是经过clamp的,是将(0,r)强行clamp到(0,r-1]的
    // 也就是说大于r-1的点会变成r-1,从而使得最后一个voxel内部的点其实都是在最后一个voxel的分界面上
    // 也就是说,x_d_1等于0的时候,它基本就是在x方向上的第R个voxel内的。
    float x_d_1 = x - x_lo_f; // / (x_hi_f - x_lo_f + 1e-8f)
    float y_d_1 = y - y_lo_f;
    float z_d_1 = z - z_lo_f;
    float x_d_0 = 1.0f - x_d_1;
    float y_d_0 = 1.0f - y_d_1;
    float z_d_0 = 1.0f - z_d_1;
	
	// 计算权重
	// 举例,对于1维度线性插值,f(x) = (ceil(x)-x) * f(floor(x)) + (x-floor(x)) * f(ceil(x))
	// wgt000 = (ceil(x)-x) * (ceil(y)-y) * (ceil(z)-z)
	// 所以wgt000对应的是点是(floor(x), floor(y), floor(z))
    float wgt000 = x_d_0 * y_d_0 * z_d_0;
    float wgt001 = x_d_0 * y_d_0 * z_d_1;
    float wgt010 = x_d_0 * y_d_1 * z_d_0;
    float wgt011 = x_d_0 * y_d_1 * z_d_1;
    float wgt100 = x_d_1 * y_d_0 * z_d_0;
    float wgt101 = x_d_1 * y_d_0 * z_d_1;
    float wgt110 = x_d_1 * y_d_1 * z_d_0;
    float wgt111 = x_d_1 * y_d_1 * z_d_1;
	
	// 计算(floor(x), floor(y), floor(z))的坐标
    int x_lo = static_cast<int>(x_lo_f);
    int y_lo = static_cast<int>(y_lo_f);
    int z_lo = static_cast<int>(z_lo_f);

	// 注意,在最后一个voxel中,x_d_1 == y_d_1 == z_d_1 == 0
    int x_hi = (x_d_1 > 0) ? -1 : 0;
    int y_hi = (y_d_1 > 0) ? -1 : 0;
    // 如果z_d_1==0,说明这个点在边界上,也就没有下一个z_hi对应的feautre了,所以置位1
    int z_hi = (z_d_1 > 0) ? 1 : 0;

    int idx000 = x_lo * r2 + y_lo * r + z_lo;
    int idx001 = idx000 + z_hi;      // x_lo * r2 + y_lo * r + z_hi;
    // 当y_hi==0时,说明y_d_1==0,说明这个点在y方向上的第R个voxel中,所以在y方向上没有下一个voxel了
    // 此时y_hi & r == 0,保证数组不会越界
    // 当当y_hi==1时,说明这个点不在边界上,由于-1的所有位都是1,此时y_hi & r == r
    int idx010 = idx000 + (y_hi & r);  // x_lo * r2 + y_hi * r + z_lo;
    int idx011 = idx010 + z_hi;      // x_lo * r2 + y_hi * r + z_hi;
    int idx100 = idx000 + (x_hi & r2); // x_hi * r2 + y_lo * r + z_lo;
    int idx101 = idx100 + z_hi;      // x_hi * r2 + y_lo * r + z_hi;
    int idx110 = idx100 + (y_hi & r);  // x_hi * r2 + y_hi * r + z_lo;
    int idx111 = idx110 + z_hi;      // x_hi * r2 + y_hi * r + z_hi;

    if (is_training) {
      // 如果不training, 就不修改wgts和inds对应的显存
      wgts[i] = wgt000;
      wgts[i + n] = wgt001;
      wgts[i + n * 2] = wgt010;
      wgts[i + n * 3] = wgt011;
      wgts[i + n * 4] = wgt100;
      wgts[i + n * 5] = wgt101;
      wgts[i + n * 6] = wgt110;
      wgts[i + n * 7] = wgt111;
      inds[i] = idx000;
      inds[i + n] = idx001;
      inds[i + n * 2] = idx010;
      inds[i + n * 3] = idx011;
      inds[i + n * 4] = idx100;
      inds[i + n * 5] = idx101;
      inds[i + n * 6] = idx110;
      inds[i + n * 7] = idx111;
    }

    for (int j = 0; j < c; j++) {
      int jr3 = j * r3;
      // 计算这个点的feature的每个channal
      outs[j * n + i] =
          wgt000 * feat[jr3 + idx000] + wgt001 * feat[jr3 + idx001] +
          wgt010 * feat[jr3 + idx010] + wgt011 * feat[jr3 + idx011] +
          wgt100 * feat[jr3 + idx100] + wgt101 * feat[jr3 + idx101] +
          wgt110 * feat[jr3 + idx110] + wgt111 * feat[jr3 + idx111];
    }
  }
}

反向传播

/*
  Function: trilinear devoxlization (backward)
  Args:
    b   : batch size
    c   : #channels
    n   : number of points
    r3  : voxel cube size = voxel resolution ** 3
    inds   : the voxel indices of point cube, IntTensor[b, 8, n]
    wgts   : weight for trilinear interpolation, FloatTensor[b, 8, n]
    grad_y : grad outputs, FloatTensor[b, c, n]
    grad_x : grad inputs, FloatTensor[b, c, r3]
*/
__global__ void trilinear_devoxelize_grad_kernel(
    int b, int c, int n, int r3, const int *__restrict__ inds,
    const float *__restrict__ wgts, const float *__restrict__ grad_y,
    float *__restrict__ grad_x) {
  int batch_index = blockIdx.x;
  int stride = blockDim.x;
  int index = threadIdx.x;
  // wgts和inds都是前向计算中得到的
  inds += batch_index * n * 8;
  wgts += batch_index * n * 8;
  grad_x += batch_index * c * r3;
  grad_y += batch_index * c * n;

  for (int i = index; i < n; i += stride) {
    // 对于每个点,找到其8个顶点对应的voxel的位置
    int idx000 = inds[i];
    int idx001 = inds[i + n];
    int idx010 = inds[i + n * 2];
    int idx011 = inds[i + n * 3];
    int idx100 = inds[i + n * 4];
    int idx101 = inds[i + n * 5];
    int idx110 = inds[i + n * 6];
    int idx111 = inds[i + n * 7];
    // 拿到8个点对应的权重
    float wgt000 = wgts[i];
    float wgt001 = wgts[i + n];
    float wgt010 = wgts[i + n * 2];
    float wgt011 = wgts[i + n * 3];
    float wgt100 = wgts[i + n * 4];
    float wgt101 = wgts[i + n * 5];
    float wgt110 = wgts[i + n * 6];
    float wgt111 = wgts[i + n * 7];

    for (int j = 0; j < c; j++) {
      int jr3 = j * r3;
      // 对于这个点的feautre的每个channel的梯度,进行反向传播
      float g = grad_y[j * n + i];
      atomicAdd(grad_x + jr3 + idx000, wgt000 * g);
      atomicAdd(grad_x + jr3 + idx001, wgt001 * g);
      atomicAdd(grad_x + jr3 + idx010, wgt010 * g);
      atomicAdd(grad_x + jr3 + idx011, wgt011 * g);
      atomicAdd(grad_x + jr3 + idx100, wgt100 * g);
      atomicAdd(grad_x + jr3 + idx101, wgt101 * g);
      atomicAdd(grad_x + jr3 + idx110, wgt110 * g);
      atomicAdd(grad_x + jr3 + idx111, wgt111 * g);
    }
  }
}

你可能感兴趣的:(代码阅读,点云处理,深度学习)