CUDA学习——CUDA代码常用编写技巧(转)

1. 声明 __shared__ 变量或数组:

__shared__ float sh_farr[ 256];
__shared__ int a;
2.结构体指针成员的分配设备内存:

typedef struct Teacher_t
...{
    int a;
    unsigned int    *g_mem1;
    float            *g_mem2;
}Teacher;
void initMem( Teacher& t, const unsigned int mat_size)
...{
    unsigned int mat_size_ui = sizeof(int) * mat_size;
    unsigned int mat_size_f = sizeof(float) * mat_size;
    CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_ui) );
    CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_f) );
    ...
}
3.计时:

unsigned int timer = 0;
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
...{
      ...//kernel
}
CUT_SAFE_CALL( cutStopTimer( timer));
printf( "Total time: %f ms ", cutGetTimerValue( timer) );
    CUT_SAFE_CALL( cutDeleteTimer( timer));
4. 获取输入命令行中包含的文件名:

/**/
//! Check if a particular filename has to be used for the file where the result
//! is stored
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param filename filename of result file, updated if user specified
//!                   filename
/**/
void
getResultFilename( int argc, char** argv, char*& filename)
...{

    char* temp = NULL;
    cutGetCmdLineArgumentstr( argc, (const char**) argv, "filename-result", &temp);
    if( NULL != temp)
    ...{
        filename = (char*) malloc( sizeof(char) * strlen( temp));
        strcpy( filename, temp);
        cutFree( temp);
    }
    printf( "Result filename: '%s' ", filename);
}
类似的:

/**/
//! Check if a specific precision of the eigenvalue has to be obtained
//! @param argc number of command line arguments (from main(argc, argv)
//! @param argv pointers to command line arguments (from main(argc, argv)
//! @param iters_timing numbers of iterations for timing, updated if a
//!                      specific number is specified on the command line
/**/
void
getPrecision( int argc, char** argv, float& precision)
...{
    float temp = -1.0f;
    cutGetCmdLineArgumentf( argc, (const char**) argv, "precision", &temp);
    if( temp > 0.0f)
    ...{
        precision = temp;
    }
    printf( "Precision: %f ", precision);
}
5.Host调用完kernel函数需要进行线程同步,而在kernel或global函数只需要在必要的地方__syncthreads();即可:

CUDA_SAFE_CALL( cudaThreadSynchronize());

本文来自CSDN博客,转载请标明出处:http://blog.csdn.net/dvchn/archive/2008/02/25/2119590.aspx

你可能感兴趣的:(CUDA学习)