#include "cuda_runtime.h"
#include"device_launch_parameters.h"
#include
#include"device_functions.h"
#include
#define N 4
using namespace cv;
class Base {
protected:
std::string name{ "Base" };
Base() {}
Base(std::string n) :name(n) {}
public:
std::string getName() { return this->name; }
};
class son :public Base {
public:
son(std::string n) :Base(n) {}
};
__global__ void MatAdd() {
int blockSize = blockDim.x * blockDim.y * blockDim.z;
printf("blockSize= %d\n",blockSize);
int blockID = gridDim.x * gridDim.y * blockIdx.z +
gridDim.x * blockIdx.y +
blockIdx.x;
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockDim.y * blockIdx.y;
printf("blockDim.x=%d\n", blockDim.x);
printf("blockDim.y=%d\n", blockDim.y);
printf("blockDim.z=%d\n", blockDim.z);
printf("blockIdx.x=%d\n", blockIdx.x);
printf("blockIdx.y=%d\n", blockIdx.y);
printf("blockIdx.z=%d\n", blockIdx.z);
printf("threadIdx.x=%d\n", threadIdx.x);
printf("threadIdx.y=%d\n", threadIdx.y);
printf("threadIdx.z=%d\n", threadIdx.z);
printf("gridDim.x=%d\n", gridDim.x);
printf("gridDim.y=%d\n", gridDim.y);
printf("gridDim.z=%d\n", gridDim.z);
int tid = threadIdx.z * blockDim.x * blockDim.y +
threadIdx.y * blockDim.x +
threadIdx.x;
int bid = 0;
printf("from block:%d , thread %d\n", i, j);
printf("from bid :%d , tid %d\n", bid, tid);
}
__global__ void getIdx_1D_1D() {
int bid = blockIdx.x;
int tid = blockIdx.x +
bid * blockDim.x;
int threadID = threadIdx.x;
printf("1D grid,1D block. bid = %d, tid = %d , threadID = %d\n",bid,tid,threadID );
}
__global__ void getIdx_2D_2D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x;
int threadID = threadIdx.x +
threadIdx.y * blockDim.x;
int tid = threadIdx.x +
threadIdx.y * blockDim.x+
bid * blockDim.x * blockDim.y;
printf("2D grid,2D block. bid = %d, tid = %d, threadID=%d,thread-(%d,%d,%d), block-(%d,%d,%d) \n",
bid,tid,threadID,
threadIdx.x, threadIdx.y, threadIdx.z,
blockIdx.x, blockIdx.y,blockIdx.z);
}
__global__ void getIdx_3D_3D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x +
blockIdx.z * gridDim.x * gridDim.y;
int tid = threadIdx.x +
threadIdx.y * blockDim.x +
threadIdx.z * blockDim.y * blockDim.z +
bid * blockDim.x * blockDim.y * blockDim.z;
int threadID = threadIdx.x +
threadIdx.y * blockDim.x +
threadIdx.z * blockDim.y * blockDim.z;
printf("3D grid, 3D block. bid = %d, tid = %d, threadID = %d, thread-(%d,%d,%d),block-(%d,%d,%d)\n",
bid, tid, threadID,
threadIdx.x,threadIdx.y,threadIdx.z,
blockIdx.x,blockIdx.y,blockIdx.z);
}
__global__ void getIdx_3D_2D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x +
blockIdx.z * gridDim.x * gridDim.y;
int threadID = threadIdx.x +
threadIdx.y * blockDim.x;
int tid = threadIdx.x +
threadIdx.y * blockDim.x +
bid * blockDim.x * blockDim.y;
printf("3D grid,2D block. bid = %d, tid = %d, threadID = %d, thread-(%d,%d,%d),block-(%d,%d,%d)\n",
bid, tid,threadID,
threadIdx.x, threadIdx.y, threadIdx.z,
blockIdx.x, blockIdx.y, blockIdx.z);
}
__global__ void getIdx_3D_1D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x +
blockIdx.z * gridDim.x * gridDim.y;
int tid = threadIdx.x +
bid * blockDim.x;
int threadID = threadIdx.x;
printf("3D grid,1D block. bid = %d,tid = %d, threadID = %d\n", bid, tid, threadID);
}
__global__ void getIdx_2D_1D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x;
int tid = threadIdx.x +
bid * blockDim.x;
printf("2D grid,1D block. bid = %d, tid = %d\n", bid, tid);
}
__global__ void getIdx_2D_3D() {
int bid = blockIdx.x +
blockIdx.y * gridDim.x;
int tid = threadIdx.x +
threadIdx.y * blockDim.x +
threadIdx.z * blockDim.y * blockDim.z +
bid * blockDim.x * blockDim.y * blockDim.z;
int threadID = threadIdx.x +
threadIdx.y * blockDim.x +
threadIdx.z * blockDim.y * blockDim.z;
printf("2D grid,3D block. bid = %d, tid = %d, threadID = %d, thread-(%d,%d,%d),block-(%d,%d,%d)\n",
bid, tid, threadID,
threadIdx.x, threadIdx.y, threadIdx.z,
blockIdx.x, blockIdx.y, blockIdx.z);
}
__global__ void getIdx_1D_3D() {
int bid = blockIdx.x;
int tid = threadIdx.x +
threadIdx.y * blockDim.x +
threadIdx.z * blockDim.x * blockDim.y+
bid * blockDim.x * blockDim.y * blockDim.z;
printf("1D grid,3D block. bid = %d, tid = %d\n", bid, tid);
}
__global__ void getIdx_1D_2D() {
int bid = blockIdx.x;
int tid = threadIdx.x +
threadIdx.y * blockDim.x +
bid * blockDim.x * blockDim.y;
int threadID = threadIdx.x +
threadIdx.y + blockDim.x;
printf("1D grid,2D block. bid = %d, tid = %d, threadID=%d \n", bid, tid, threadID);
}
int main() {
dim3 threadPerBlock(16, 16);
dim3 numBlock(N / threadPerBlock.x , N / threadPerBlock.y);
std::cout << "2 * 2 blocks," << " 3 * 2 threads" << std::endl;
std::cout << "*********" << std::endl;
const dim3 block_size(3, 2);
const dim3 grid_size(2,2);
getIdx_3D_3D << < block_size, grid_size >> > ();
cudaDeviceSynchronize();
return 0;
}
如果代码没错,tid全局索引好像是 线程索引threadID+块内的偏移??