cuda里有cudaEvent_t,通过typedef struct CUevent_st *cudaEvent_t; 。CUevent_st这个结构体并没有看到源代码。也不需要仔细关注。
cudaEventCreate()函数能创建一个cudaEvent_t类型事件,如果需要记录时间,参数2就不需要写,当然我们现在就需要记录时间。接着cudaEventRecord()函数来记录流逝的时间。再调用cudaEventSynchronize同步一下,来确保我们能在cudaEventRecord()这个函数调用来获取准备的值。然后调用cudaEventElapsedTime()函数来获取时间。最后调用cudaEventDestroy()函数来销毁创建的事件。
下面演示一个代码:
#include "cuda_runtime.h" #include <iostream> using namespace std; static void HandleError( cudaError_t err, const char *file, int line ) { if (err != cudaSuccess) { printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line ); exit( EXIT_FAILURE ); } } #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) #define DIM 2046 #define N (DIM * DIM) __global__ void add( int *c, const int* a, const int* b ) { int x = blockIdx.x; int y = blockIdx.y; int offset = x + y * gridDim.x; c[offset] = a[offset] + b[offset]; } int main() { cudaEvent_t timeStartEvent,timeEndEvent; HANDLE_ERROR ( cudaEventCreate( &timeStartEvent, 0) ); HANDLE_ERROR ( cudaEventCreate( &timeEndEvent, 0) ); HANDLE_ERROR ( cudaEventRecord( timeStartEvent, 0) ); // 测试代码开始 int *a, *b, *c; int *dev_a, *dev_b, *dev_c; // 在CPU上分配内存 a = new int[N]; b = new int[N]; c = new int[N]; // 在GPU上分配内存 HANDLE_ERROR( cudaMalloc( (void**)&dev_a, N * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_b, N * sizeof(int) ) ); HANDLE_ERROR( cudaMalloc( (void**)&dev_c, N * sizeof(int) ) ); // 赋值 for (int i=0; i<N; i++) { a[i] = i; b[i] = 2 * i; } HANDLE_ERROR( cudaMemcpy( dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice ) ); HANDLE_ERROR( cudaMemcpy( dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice ) ); dim3 grid(DIM,DIM); add<<<grid,1>>>( dev_c,dev_a,dev_b ); HANDLE_ERROR( cudaMemcpy( c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost ) ); bool success = true; for (int i=0; i<N; i++) { if ((a[i] + b[i]) != c[i]) { cout << "Error: "<<a[i]<<" + "<<b[i]<<" != "<<c[i]<<endl; success = false; } } if (success) cout << "We did it!\n" ; HANDLE_ERROR( cudaFree( dev_a ) ); HANDLE_ERROR( cudaFree( dev_b ) ); HANDLE_ERROR( cudaFree( dev_c ) ); delete[] a;delete[] b; delete[] c; // 测试代码结束 HANDLE_ERROR ( cudaEventRecord( timeEndEvent, 0) ); HANDLE_ERROR ( cudaEventSynchronize( timeEndEvent ) ); float elapsedTime = 0 ; HANDLE_ERROR ( cudaEventElapsedTime( & elapsedTime, timeStartEvent, timeEndEvent ) ); cout << "elapsedTime " << elapsedTime << " ms. "; HANDLE_ERROR( cudaEventDestroy( timeStartEvent ) ); HANDLE_ERROR( cudaEventDestroy( timeEndEvent ) ); return 0; }http://cuda.it168.com/a2011/0810/1230/000001230729.shtml