cuda ptx 汇编语言示例:读寄存器

编译 ,  Ampere 显卡,rtx 3060   3070...

nvcc -arch=sm_86 -o hello hello_ptx.cu

或写成Makefile:

hello: hello_sm_id.cu
        nvcc -arch=sm_86 -o $@ $^
#nvcc -arch=sm_86 -o hello hello_sm_id.cu

$@  是指目标

$^  是指第一个依赖  ^^

hello_ptx.cu

#include 
#include 

static __device__ __inline__ uint32_t __mysmid(){
  uint32_t ssmid;
  asm volatile("mov.u32 %0, %%smid;" : "=r"(ssmid));
  return ssmid;}

static __device__ __inline__ uint32_t __mywarpid(){
  uint32_t warpid;
  asm volatile("mov.u32 %0, %%warpid;" : "=r"(warpid));
  return warpid;}

static __device__ __inline__ uint32_t __mylaneid(){
  uint32_t laneid;
  asm volatile("mov.u32 %0, %%laneid;" : "=r"(laneid));
  return laneid;}


__global__ void mykernel(){

        int idx = threadIdx.x+blockDim.x*blockIdx.x;
        unsigned thx = threadIdx.x;
//      if(threadIdx.x==1023)// && blockIdx.x<3)
//      if(threadIdx.x==1)
//      if((thx==0 || thx==32 || thx==64 || thx==96 || thx==128 || thx==160 || thx==192 || thx==224 || thx==256) && blockIdx.x==0)
        if(blockIdx.x<=33 && thx==0)
                printf("I am thread %d, my SM ID is %d, my warp ID is %d, and my warp lane is %d\n", idx, __mysmid(), __mywarpid(), __mylaneid());
}

int main(){

        dim3 grid_;
        dim3 block_;
        grid_.x=34;
        block_.x=1024;
  mykernel<<>>();
  cudaDeviceSynchronize();
  return 0;
}
//$ nvcc -arch=sm_20 -o hello hello_ptx.cu

运行:

cuda ptx 汇编语言示例:读寄存器_第1张图片

结果分析:

rtx3060中,

当 block 的个数从一个增加到两个,3个,...

smid的值为 0,  2,  4, ...直到偶数最大  max_even(smid), 然后是  1,3,5,... 直到奇数最大 max_odd(smid).

rtx 3060  2 SM/TPC

奇偶的变化,跟这个里的2是否有关系呢?以及有什么关系呢?

你可能感兴趣的:(gpc,tpc,SM)