【OpenPCDet】稀疏卷积SPConv-v1.2代码解读(5)

【普通稀疏卷积】

        了解完子流形3D稀疏卷积我们再来看spconv中对于普通3D稀疏卷积的处理过程。这要回到spconv_ops.cc文件中,从getIndicePairs的普通3D稀疏卷积分支讲起。

摘自:src/spconv/spconv_ops.cc

std::vector
getIndicePairs(torch::Tensor indices, int64_t batchSize,
               std::vector outSpatialShape,
               std::vector spatialShape,
               std::vector kernelSize, std::vector stride,
               std::vector padding, std::vector dilation,
               std::vector outPadding, int64_t _subM,
               int64_t _transpose, int64_t _useHash) {

    //...省略....
    auto indicePairUnique = torch::full(
        {indicePairs.numel() / 2 + 1}, std::numeric_limits::max(),
        torch::dtype(torch::kInt32).device(indices.device()));
    torch::Tensor outInds =
        //e.g. torch.Size([N*27,3+1])
        torch::zeros({numAct * kernelVolume, coorDim + 1},
                     torch::dtype(torch::kInt32).device(indices.device()));
    if (indices.device().type() == torch::kCPU) {
      numActOut = create_conv_indice_pair_cpu(
          indices, outInds, gridOut, indicePairs, indiceNum, kernelSize, stride,
          padding, dilation, outSpatialShape, transpose, false, useHash);
    }
    #ifdef TV_CUDA
    else if (indices.device().type() == torch::kCUDA) {
        numActOut = create_conv_indice_pair_p1_cuda(
            indices, indicePairs, indiceNum, indicePairUnique, kernelSize, stride,
            padding, dilation, outSpatialShape, transpose);
        if (numActOut > 0) {
          auto res = torch::_unique(indicePairUnique);
          indicePairUnique = std::get<0>(res);
          numActOut = create_conv_indice_pair_p2_cuda(
              indices, outInds, gridOut, indicePairs, indiceNum, indicePairUnique,
              outSpatialShape, transpose, false, useHash);
          if (numActOut == -1) {
            auto device = indices.device();
            outInds = outInds.to({torch::kCPU});
            indicePairs = indicePairs.to({torch::kCPU});
            indiceNum = indiceNum.to({torch::kCPU});
            indices = indices.to({torch::kCPU});
            numActOut = create_conv_indice_pair_cpu(
                indices, outInds, gridOut, indicePairs, indiceNum, kernelSize,
                stride, padding, dilation, outSpatialShape, transpose, false,
                useHash);
  
            return {outInds.to(device).slice(0, 0, numActOut),
                    indicePairs.to(device), indiceNum.to(device)};
          }
        }
    }      
    #endif
    //...省略...
}

cuda部分的计算逻辑这里分成两步,先调用create_conv_indice_pair_p1_cuda(...)函数,再调用create_conv_indice_pair_p2_cuda(...)函数。在create_conv_indice_pair_p1_cuda(...)函数中,我们重点关注prepareIndicePairsKernel核函数。

摘自:include/spconv/indice.cu.h

  1 template 
  3 __global__ void prepareIndicePairsKernel(
  4     tv::TensorView indicesIn, tv::TensorView indicePairs,
  5     tv::TensorView indiceNum, tv::TensorView indicePairUnique,
  6     const tv::SimpleVector kernelSize,
  7     const tv::SimpleVector stride,
  8     const tv::SimpleVector padding,
  9     const tv::SimpleVector dilation,
 10     const tv::SimpleVector outSpatialShape) {
 11   auto numActIn = indicesIn.dim(0);
 12   Index spatialVolume = 1;
 13 #pragma unroll
 14   for (int i = 0; i < NDim; ++i) {
 15     spatialVolume *= outSpatialShape[i];
 16   }
 17   Index kernelVolume = 1;
 18 #pragma unroll
 19   for (int i = 0; i < NDim; ++i) {
 20     kernelVolume *= kernelSize[i];
 21   }
 22   Index numValidPoints = 0;
 23   Index validPoints[KernelMaxVolume * (NDim + 1)]; //kernelMaxVolume??
 24   Index *pointPtr = nullptr;
 25   auto indicePairsDim2 = indicePairs.dim(2);
 26   Index index;
 27   for (int ix : tv::KernelLoopX(numActIn)) {
 28     numValidPoints = getValidOutPos(
 29         indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),
 30         stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),
 31         validPoints);
 32     for (Index i = 0; i < numValidPoints; ++i) {
 33       pointPtr = validPoints + i * (NDim + 1);
 34       auto offset = pointPtr[NDim];
 35       Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));
 36       indicePairs(0, offset, oldNum) = ix;
 37       index = tv::ArrayIndexRowMajor::runPtrs(
 38                   pointPtr, outSpatialShape.data(), 0) +
 39               spatialVolume * indicesIn(ix, 0);
 40       indicePairs(1, offset, oldNum) = index;
 41       indicePairUnique[offset * indicePairsDim2 + oldNum] = index;
 42     }
 43   }
 44 }

        第17行定义的这个kernelVolume在核函数中并没有用到,不知何意。

 1 template                                                                                                                                                                 
  2 TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
  3                                     const Index *kernelSize,
  4                                     const Index *stride, 
  5                                     const Index *padding,
  6                                     const Index *dilation,
  7                                     const Index *outSpatialShape, 
  8                                     Index *out) {
  9   Index lowers[NDim];
 10   Index uppers[NDim];
 11   Index counter[NDim];
 12   Index counterSize[NDim];
 13   Index pointCounter = 0;
 14   Index val,m,offset;
 15   Index numPoints = 1;
 16   bool valid = false;
 17 #pragma unroll
 18   for (int i = 0; i < NDim; ++i) {
 19     lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +
 20                  stride[i] + padding[i]) / stride[i];
 21     uppers[i] = (input_pos[i] + padding[i]) / stride[i];
 22   }  
 23 #pragma unroll
 24   for (unsigned i = 0; i < NDim; ++i) {
 25     counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);
 26     numPoints *= counterSize[i];
 27   }  
 28 #pragma unroll
 29   for (int i = 0; i < NDim; ++i) {
 30     counter[i] = 0;
 31   }  
 32   for (int i = 0; i < numPoints; ++i) {
 33     valid = true;
 34     m = 1;
 35     offset = 0;
 36 #pragma unroll
 37     for (int j = NDim - 1; j >= 0; --j) { 
 38       val = uppers[j] - counter[j] * dilation[j]; 
 39       out[pointCounter * (NDim + 1) + j] = val;
 40       if (val < 0 || (val > outSpatialShape[j] - 1)) {
 41         valid = false; 
 42         // break;
 43       }
 44       offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j];
 45       m *= kernelSize[j];
 46     }
 47     out[pointCounter * (NDim + 1) + NDim] = offset;
 48     if (valid) ++pointCounter;
 49     counter[NDim - 1] += 1;
 50 #pragma unroll
 51     for (int c = NDim - 1; c >= 0; --c) {
 52       if (counter[c] == counterSize[c] && c > 0) {
 53         counter[c - 1] += 1;
 54         counter[c] = 0;
 55       }
 56     }
 57   }  
 58   return pointCounter;
 59 }    

        第18~22行对于一个特定的输入,求其在各个维度上的输出边界[lower,upper]。在spconv github项目上我看了网友对它的一番解释,特地将其粘贴至此。

这里计算各个维度上的输出的边界值[lower,upper],他们是根据给定的参数信息使用公式推导出来的理论值。这里还没有做进一步的越界检查的,后面的代码会做进一步的处理。        

【OpenPCDet】稀疏卷积SPConv-v1.2代码解读(5)_第1张图片

        第32~57行要做的就是对输出数组(out)做一个有效的填充。你把out理解为一个[N][NDim+1]的二维数组。则每一行表示一个输出位置i,out[i][0]...out[i][NDim-1]存储第i个输出位置的索引。out[i][NDim]存储与输入相作用的kernel的偏移(offset)。

        完成getValidOutPos的计算后返回到prepareIndicePairsKernel函数中,依靠getValidOutPos中计算得到的out数组完成rulebook的建立。重点在下面这几行代码:

for (Index i = 0; i < numValidPoints; ++i) {
      pointPtr = validPoints + i * (NDim + 1);
      auto offset = pointPtr[NDim];
      Index oldNum = atomicAdd(indiceNum.data() + offset, Index(1));
      //offset偏移处,第oldNum次运算,输入索引为idx,输出索引为index
      indicePairs(0, offset, oldNum) = ix;
      index = tv::ArrayIndexRowMajor::runPtrs(
                  pointPtr, outSpatialShape.data(), 0) + spatialVolume * indicesIn(ix, 0);
      indicePairs(1, offset, oldNum) = index;
      //off0.....|off1....|off2.....|off3....|...{numActIn}..|....
      indicePairUnique[offset * indicePairsDim2 + oldNum] = index;
}

【参考文献】

What's the meaning of function "getValidOutPos"? · Issue #224 · traveller59/spconv · GitHub

你可能感兴趣的:(3D目标检测,深度学习,人工智能)