cuda线程的索引index怎么算呢??

#include "cuda_runtime.h" 
#include"device_launch_parameters.h"
#include
#include"device_functions.h"
#include
#define	 N  4
using namespace cv;
class Base {
protected:
	std::string name{ "Base" };
	Base() {}
	Base(std::string n) :name(n) {} //初始化当前类成员
public:
	std::string getName() { return this->name; }
};
class son :public Base {
public:
	//son(std::string n) { name = n; }
	//son(std::string n) : name(n) {}  //初始化当前类的成员,不是初始化父类的!
	son(std::string n) :Base(n) {}//委托构造

};

__global__ void MatAdd() {
	int blockSize = blockDim.x * blockDim.y * blockDim.z; //三维block大小
	printf("blockSize= %d\n",blockSize);
	int blockID = gridDim.x * gridDim.y * blockIdx.z +
					gridDim.x * blockIdx.y +
					blockIdx.x;


	int i = threadIdx.x + blockIdx.x * blockDim.x;
	int j = threadIdx.y + blockDim.y * blockIdx.y;
	//const int bid = blockIdx.x;
	//const int tid = threadIdx.x;
	printf("blockDim.x=%d\n", blockDim.x);
	printf("blockDim.y=%d\n", blockDim.y);
	printf("blockDim.z=%d\n", blockDim.z);

	printf("blockIdx.x=%d\n", blockIdx.x);
	printf("blockIdx.y=%d\n", blockIdx.y);
	printf("blockIdx.z=%d\n", blockIdx.z);

	printf("threadIdx.x=%d\n", threadIdx.x);
	printf("threadIdx.y=%d\n", threadIdx.y);
	printf("threadIdx.z=%d\n", threadIdx.z);

	printf("gridDim.x=%d\n", gridDim.x);
	printf("gridDim.y=%d\n", gridDim.y);
	printf("gridDim.z=%d\n", gridDim.z);
	
	int tid = threadIdx.z * blockDim.x * blockDim.y +
			  threadIdx.y * blockDim.x +
			  threadIdx.x;
	int bid = 0;
	printf("from block:%d , thread %d\n", i, j);
	printf("from bid :%d , tid  %d\n", bid, tid);

	//if (i < N && j < N) {
	//	printf("from block:%d , thread %d",i,j);
	//	//C[i][j] = A[i][j] + B[i][j];
	//}
}

__global__ void getIdx_1D_1D() {
	int bid = blockIdx.x;
	int tid = blockIdx.x + 
		      bid * blockDim.x;
	int threadID = threadIdx.x;
	printf("1D grid,1D block. bid = %d, tid = %d , threadID = %d\n",bid,tid,threadID );
}
__global__ void getIdx_2D_2D() {
	int bid =	blockIdx.x + 
				blockIdx.y * gridDim.x;
	int threadID =	threadIdx.x +
					threadIdx.y * blockDim.x;
	int tid =	threadIdx.x + 
				threadIdx.y * blockDim.x+
				bid * blockDim.x * blockDim.y;
	printf("2D grid,2D block.  bid = %d, tid = %d, threadID=%d,thread-(%d,%d,%d), block-(%d,%d,%d) \n",
								bid,tid,threadID,
									threadIdx.x, threadIdx.y, threadIdx.z,
									blockIdx.x, blockIdx.y,blockIdx.z);
}
__global__ void getIdx_3D_3D() {
	int bid =	blockIdx.x + 
				blockIdx.y * gridDim.x +
				blockIdx.z * gridDim.x * gridDim.y;
	int tid =	threadIdx.x +
				threadIdx.y * blockDim.x +
				threadIdx.z * blockDim.y * blockDim.z +
				bid * blockDim.x * blockDim.y * blockDim.z;
	int threadID =	threadIdx.x +
					threadIdx.y * blockDim.x +
					threadIdx.z * blockDim.y * blockDim.z;
	printf("3D grid, 3D block. bid = %d, tid = %d, threadID = %d, thread-(%d,%d,%d),block-(%d,%d,%d)\n",
					bid, tid, threadID,
					threadIdx.x,threadIdx.y,threadIdx.z,
					blockIdx.x,blockIdx.y,blockIdx.z);
}
__global__ void getIdx_3D_2D() {
	int bid =	blockIdx.x +
				blockIdx.y * gridDim.x +
				blockIdx.z * gridDim.x * gridDim.y;
	int threadID =	threadIdx.x +
					threadIdx.y * blockDim.x;
	int tid =	threadIdx.x +
				threadIdx.y * blockDim.x +
				bid * blockDim.x * blockDim.y;
	printf("3D grid,2D block.  bid = %d, tid = %d, threadID = %d,  thread-(%d,%d,%d),block-(%d,%d,%d)\n",
							bid, tid,threadID,
							threadIdx.x, threadIdx.y, threadIdx.z,
							blockIdx.x, blockIdx.y, blockIdx.z);
}
__global__ void getIdx_3D_1D() {
	int bid =	blockIdx.x +
				blockIdx.y * gridDim.x +
				blockIdx.z * gridDim.x * gridDim.y;
	int tid =	threadIdx.x +
				bid * blockDim.x;
	int threadID = threadIdx.x;
	printf("3D grid,1D block.  bid = %d,tid = %d, threadID = %d\n", bid, tid, threadID);
}
__global__ void getIdx_2D_1D() {
	int bid =	blockIdx.x +
				blockIdx.y * gridDim.x;
	int tid =	threadIdx.x +
				bid * blockDim.x;
	printf("2D grid,1D block.  bid = %d, tid = %d\n", bid, tid);
}
__global__ void getIdx_2D_3D() {
	int bid =	blockIdx.x +
				blockIdx.y * gridDim.x;
	int tid =	threadIdx.x +
				threadIdx.y * blockDim.x +
				threadIdx.z * blockDim.y * blockDim.z +
				bid * blockDim.x * blockDim.y * blockDim.z;
	int threadID =	threadIdx.x + 
					threadIdx.y * blockDim.x +
					threadIdx.z * blockDim.y * blockDim.z;

	printf("2D grid,3D block.  bid = %d, tid = %d, threadID = %d,  thread-(%d,%d,%d),block-(%d,%d,%d)\n",
								bid, tid, threadID,
								threadIdx.x, threadIdx.y, threadIdx.z,
								blockIdx.x, blockIdx.y, blockIdx.z);
}
__global__ void getIdx_1D_3D() {
	int bid =	blockIdx.x;
	int tid =	threadIdx.x +
				threadIdx.y * blockDim.x +
				threadIdx.z * blockDim.x * blockDim.y+
				bid * blockDim.x * blockDim.y * blockDim.z;
	printf("1D grid,3D block.  bid = %d, tid = %d\n", bid, tid);
}
__global__ void getIdx_1D_2D() {
	int bid =	blockIdx.x;
	int tid =	threadIdx.x +
				threadIdx.y * blockDim.x +
				bid * blockDim.x * blockDim.y;
	int threadID =  threadIdx.x +
					threadIdx.y + blockDim.x;
	printf("1D grid,2D block.  bid = %d, tid = %d, threadID=%d \n", bid, tid, threadID);
}
int main() {
	//kernel invocation
	dim3 threadPerBlock(16, 16);
	dim3 numBlock(N / threadPerBlock.x , N / threadPerBlock.y);
	std::cout << "2 * 2 blocks," << " 3 * 2 threads" << std::endl;
	
	//getIdx_3D_3D << < (2,1),(2,3)>> > (); //block:2 thread:4
	std::cout << "*********" << std::endl;
	//getIdx_3D_3D << < (1, 2), (2, 3) >> > (); //block:2 thread:4
	//getIdx_1D_3D << < (1, 2), (2, 3) >> > (); //block:2 thread:4
	//getIdx_3D_2D << < 2, (3,5) >> > (); //block:2 thread:4
	//getIdx_1D_1D << < 2, 4 >> > (); //block:2 thread:4
	const dim3 block_size(3, 2);
	const dim3 grid_size(2,2);
	getIdx_3D_3D << < block_size, grid_size >> > (); //block:2 thread:4
	
	
	cudaDeviceSynchronize();


	//MatAdd << > > (A, B, C);
	return 0;
}

cuda线程的索引index怎么算呢??_第1张图片
cuda线程的索引index怎么算呢??_第2张图片

如果代码没错,tid全局索引好像是 线程索引threadID+块内的偏移??

你可能感兴趣的:(cuda,c++,开发语言)