
Texture Fetching
Texture Reference
Texture Functions
Surface Functions

texture<Type, Dimension, ReadMode> Name;
texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;


纹理类型 内建函数
设备内存 tex1Dfetch(int)
1D tex1D(float x, float y)
2D tex2D(float x, float y)
3D tex3D(float x, float y, float z)
立方图纹理 texCubemap(float x, float y, float z)
分层纹理(1D) tex1DLayered(float x, int layer)
分层纹理(2D) tex2DLayered(float x, float y, int layer)
分层纹理(立方图) tex1DLayered(float x, float y, float z, int layer)


texture<float, 1, cudaReadModeElementType> tex1;
tex1.filterMode = cudaFilterModeLinear; // enable linear filtering
tex1.normalized = true; // texture coordinates will be normalized
内存 函数
1D设备内存 cudaBindTexture()
2D设备内存 cudaBindTexture2D()
CUDA数组 cudaBindTextureToArray()
解除绑定 cudaUnbindTexture()


texture<float, cudaTextureType2D,
        cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, &texRef);
cudaChannelFormatDesc channelDesc =
size_t offset;
cudaBindTexture2D(&offset, texRefPtr, devPtr, &channelDesc,
                  width, height, pitch);
    texture<float, cudaTextureType2D,
            cudaReadModeElementType> texRef;
    cudaChannelFormatDesc channelDesc =
    size_t offset;
    cudaBindTexture2D(&offset, texRef, devPtr, channelDesc,
                      width, height, pitch);

// 开辟CUDA数组的内存
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
cudaArray* cuArray;
int width = 10, height = 10;
cudaMallocArray(&cuArray, &channelDesc, width, height);
// 从主机内存,拷贝到CUDA数组
cudaMemcpyToArray(cuArray, 0, 0, h_data, size, cudaMemcpyHostToDevice);


cudaMemcpy2DToArray(dst, 0, 0, src, w*sizeof(src[0]) , w*sizeof(src[0]), h, cudaMemcpyHostToDevice);


texture<float, cudaTextureType2D,
        cudaReadModeElementType> texRef;
textureReference* texRefPtr;
cudaGetTextureReference(&texRefPtr, &texRef);
cudaChannelFormatDesc channelDesc;
cudaGetChannelDesc(&channelDesc, cuArray);
cudaBindTextureToArray(texRef, cuArray, &channelDesc);
texture<float, cudaTextureType2D,
        cudaReadModeElementType> texRef;
cudaBindTextureToArray(texRef, cuArray);
void test_4(){
    cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
    cudaArray* cuArray;
    int width = 10, height = 10;
    cudaMallocArray(&cuArray, &channelDesc, width, height);

    float *hData = new float[100];
    cudaMemcpy2DToArray(cuArray, 0, 0, hData, width*sizeof(float),
                        width*sizeof(float), height, cudaMemcpyHostToDevice);

    texture<float, cudaTextureType2D, cudaReadModeElementType> texRef;
    // Set texture reference parameters
    texRef.addressMode[0] = cudaAddressModeWrap;
    texRef.addressMode[1] = cudaAddressModeWrap;
    texRef.filterMode     = cudaFilterModeLinear;
    texRef.normalized     = true;
    cudaBindTextureToArray(texRef, cuArray, channelDesc);
    //cudaBindTextureToArray(texRef, cuArray);

    delete[] hData;
// Simple transformation kernel
__global__ void transformKernel(float* output,
                                cudaTextureObject_t texObj,
                                int width, int height,
                                float theta) 
    // Calculate normalized texture coordinates
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;

    float u = x / (float)width;
    float v = y / (float)height;

    // Transform coordinates
    u -= 0.5f;
    v -= 0.5f;
    float tu = u * cosf(theta) - v * sinf(theta) + 0.5f;
    float tv = v * cosf(theta) + u * sinf(theta) + 0.5f;

    // Read from texture and write to global memory
    output[y * width + x] = tex2D<float>(texObj, tu, tv);
// Allocate CUDA array in device memory
    cudaChannelFormatDesc channelDesc =
               cudaCreateChannelDesc(32, 0, 0, 0,
    cudaArray* cuArray;
    int width = 10, height = 10;
    cudaMallocArray(&cuArray, &channelDesc, width, height);

    // Copy to device memory some data located at address h_data
    // in host memory
    float *hData = new float[100];
    cudaMemcpy2DToArray(cuArray, 0, 0, hData, width*sizeof(float),
                        width*sizeof(float), height, cudaMemcpyHostToDevice);

    // Specify texture
    struct cudaResourceDesc resDesc;
    memset(&resDesc, 0, sizeof(resDesc));
    resDesc.resType = cudaResourceTypeArray;
    resDesc.res.array.array = cuArray;

    // Specify texture object parameters
    struct cudaTextureDesc texDesc;
    memset(&texDesc, 0, sizeof(texDesc));
    texDesc.addressMode[0]   = cudaAddressModeWrap;
    texDesc.addressMode[1]   = cudaAddressModeWrap;
    texDesc.filterMode       = cudaFilterModeLinear;
    texDesc.readMode         = cudaReadModeElementType;
    texDesc.normalizedCoords = 1;

    // Create texture object
    cudaTextureObject_t texObj = 0;
    cudaCreateTextureObject(&texObj, &resDesc, &texDesc, NULL);

    // Allocate result of transformation in device memory
    float* output;
    cudaMalloc(&output, width * height * sizeof(float));

    // Invoke kernel
    dim3 dimBlock(16, 16);
    dim3 dimGrid((width  + dimBlock.x - 1) / dimBlock.x,
                 (height + dimBlock.y - 1) / dimBlock.y);
    //                                       texObj, width, height,
    //                                       angle);

    // Destroy texture object

    // Free device memory




// Simple copy kernel
__global__ void copyKernel(cudaSurfaceObject_t inputSurfObj,
                           cudaSurfaceObject_t outputSurfObj,
                           int width, int height) 
    // Calculate surface coordinates
    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
    if (x < width && y < height) {
        uchar4 data;
        // Read from input surface
        surf2Dread(&data,  inputSurfObj, x * 4, y);
        // Write to output surface
        surf2Dwrite(data, outputSurfObj, x * 4, y);

// Host code
int main()
    // Allocate CUDA arrays in device memory
    cudaChannelFormatDesc channelDesc =
             cudaCreateChannelDesc(8, 8, 8, 8,
    cudaArray* cuInputArray;
    cudaMallocArray(&cuInputArray, &channelDesc, width, height,
    cudaArray* cuOutputArray;
    cudaMallocArray(&cuOutputArray, &channelDesc, width, height,

    // Copy to device memory some data located at address h_data
    // in host memory 
    //cudaMemcpyToArray(cuInputArray, 0, 0, h_data, size,
    //                  cudaMemcpyHostToDevice);
    cudaMemcpy2DToArray(cuArray, 0, 0, hData, width*sizeof(float),
                        width*sizeof(float), height, cudaMemcpyHostToDevice);

    // Specify surface
    struct cudaResourceDesc resDesc;
    memset(&resDesc, 0, sizeof(resDesc));
    resDesc.resType = cudaResourceTypeArray;

    // Create the surface objects
    resDesc.res.array.array = cuInputArray;
    cudaSurfaceObject_t inputSurfObj = 0;
    cudaCreateSurfaceObject(&inputSurfObj, &resDesc);
    resDesc.res.array.array = cuOutputArray;
    cudaSurfaceObject_t outputSurfObj = 0;
    cudaCreateSurfaceObject(&outputSurfObj, &resDesc);

    // Invoke kernel
    dim3 dimBlock(16, 16);
    dim3 dimGrid((width  + dimBlock.x - 1) / dimBlock.x,
                 (height + dimBlock.y - 1) / dimBlock.y);
    copyKernel<<<dimGrid, dimBlock>>>(inputSurfObj,
                                      width, height);

    // Destroy surface objects

    // Free device memory

    return 0;
cudaChannelFormatDesc channelDesc =
               cudaCreateChannelDesc(32, 0, 0, 0,
cudaChannelFormatDesc channelDesc =
             cudaCreateChannelDesc(8, 8, 8, 8,

cudaChannelFormatDesc channelDesc{
    int x, y, z, w;
    enum cudaChannelFormatKind f;

enum cudaChannelFormatKind{
    cudaChannelFormatKindSigned = 0;
    cudaChannelFormatKindUnsigned = 1;
    cudaChannelFormatKindFloat = 2;
    cudaChannelFormatKindNone = 3;




类型 耗时
纹理(读) 966.11ms
表面(读) 384.11ms
设备内存(读) 370.07ms
OpenGL缓存(读) 412.27ms
表面(写) 1229.29ms
设备内存(写) 1266.11ms
OpenGL缓存(写) 1229.65ms

