1
2
3
4
5
6
7
|
texture<
float
, 2, cudaReadModeElementType> cuda_tex;
__global__
void
Kernel(...)
{
float
texel = tex2D(cuda_tex, ...);
}
|
1
2
3
4
|
texture<
float
, 2, cudaReadModeElementType> cuda_tex1;
texture<
float
, 2, cudaReadModeElementType> cuda_tex2;
texture<
float
, 2, cudaReadModeElementType> cuda_tex3;
...
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
struct
cudaTextureDesc
{
enum
cudaTextureAddressMode addressMode[3];
enum
cudaTextureFilterMode filterMode;
enum
cudaTextureReadMode readMode;
int
sRGB;
int
normalizedCoords;
unsigned
int
maxAnisotropy;
enum
cudaTextureFilterMode mipmapFilterMode;
float
mipmapLevelBias;
float
minMipmapLevelClamp;
float
maxMipmapLevelClamp;
};
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
|
// 简单纹理变换函数
__global__
void
transformKernel(
float
* output,
cudaTextureObject_t texObj,
int
width,
int
height,
float
theta)
{
// 计算纹理坐标
unsigned
int
x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned
int
y = blockIdx.y * blockDim.y + threadIdx.y;
float
u = x / (
float
)width;
float
v = y / (
float
)height;
// 坐标转换
u -= 0.5f;
v -= 0.5f;
float
tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
float
tv = v * cosf(theta) + u * sinf(theta) + 0.5f;
// 从纹理中读取并写入全局存储
output[y * width + x] = tex2D<
float
>(texObj, tu, tv);
}
int
main()
{
// 定义CUDA array
cudaChannelFormatDesc channelDesc =
cudaCreateChannelDesc(32, 0, 0, 0,
cudaChannelFormatKindFloat);
cudaArray* cuArray;
cudaMallocArray(&cuArray, &channelDesc, width, height);
// 拷贝数据到CUDA array
cudaMemcpyToArray(cuArray, 0, 0, h_data, size,
cudaMemcpyHostToDevice);
// 定义资源描述符
struct
cudaResourceDesc resDesc;
memset
(&resDesc, 0,
sizeof
(resDesc));
resDesc.resType = cudaResourceTypeArray;
resDesc.res.array.array = cuArray;
// 定义纹理对象参数
struct
cudaTextureDesc texDesc;
memset
(&texDesc, 0,
sizeof
(texDesc));
texDesc.addressMode[0] = cudaAddressModeWrap;
texDesc.addressMode[1] = cudaAddressModeWrap;
texDesc.filterMode = cudaFilterModeLinear;
texDesc.readMode = cudaReadModeElementType;
texDesc.normalizedCoords = 1;
// 生产纹理对象
cudaTextureObject_t texObj = 0;
cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);
// 分配用于保持结果的内存
float
* output;
cudaMalloc(&output, width * height *
sizeof
(
float
));
// 调用Kernel
dim3 dimBlock(16, 16);
dim3 dimGrid((width + dimBlock.x - 1) / dimBlock.x,
(height + dimBlock.y - 1) / dimBlock.y);
transformKernel<<<dimGrid, dimBlock>>>(output,
texObj, width, height,
angle);
// 销毁纹理对象
cudaDestroyTextureObject(texObj);
// 释放内存
cudaFreeArray(cuArray);
|