CUDA: 检验执行时间

cuda里有cudaEvent_t,通过typedef struct CUevent_st *cudaEvent_t; 。CUevent_st这个结构体并没有看到源代码。也不需要仔细关注。

  cudaEventCreate()函数能创建一个cudaEvent_t类型事件,如果需要记录时间,参数2就不需要写,当然我们现在就需要记录时间。接着cudaEventRecord()函数来记录流逝的时间。再调用cudaEventSynchronize同步一下,来确保我们能在cudaEventRecord()这个函数调用来获取准备的值。然后调用cudaEventElapsedTime()函数来获取时间。最后调用cudaEventDestroy()函数来销毁创建的事件。

  下面演示一个代码:

#include "cuda_runtime.h"

#include <iostream>
using namespace std;

static void HandleError( cudaError_t err, const char *file, int line )
{
	if (err != cudaSuccess)
	{
		printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
			file, line );
		exit( EXIT_FAILURE );
	}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))


#define DIM 2046
#define N (DIM * DIM)
__global__ void add( int *c, const int* a, const int* b )
{
	int x = blockIdx.x;
	int y = blockIdx.y;
	int offset = x + y * gridDim.x;
	c[offset] = a[offset] + b[offset];
}

int main()
{
	cudaEvent_t timeStartEvent,timeEndEvent;
	HANDLE_ERROR ( cudaEventCreate( &timeStartEvent, 0) );
	HANDLE_ERROR ( cudaEventCreate( &timeEndEvent, 0) );

	HANDLE_ERROR ( cudaEventRecord( timeStartEvent, 0) );

	// 测试代码开始
	int *a, *b, *c;
	int *dev_a, *dev_b, *dev_c;

	// 在CPU上分配内存
	a =  new int[N];
	b =  new int[N];
	c =  new int[N];
	// 在GPU上分配内存
	HANDLE_ERROR( cudaMalloc( (void**)&dev_a, N * sizeof(int) ) );
	HANDLE_ERROR( cudaMalloc( (void**)&dev_b, N * sizeof(int) ) );
	HANDLE_ERROR( cudaMalloc( (void**)&dev_c, N * sizeof(int) ) );

	// 赋值
	for (int i=0; i<N; i++) 
	{
		a[i] = i;
		b[i] = 2 * i;
	}

	HANDLE_ERROR( cudaMemcpy( dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice ) );
	HANDLE_ERROR( cudaMemcpy( dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice ) );

	dim3    grid(DIM,DIM);
	add<<<grid,1>>>( dev_c,dev_a,dev_b );

	HANDLE_ERROR( cudaMemcpy( c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost ) );

	bool success = true;
	for (int i=0; i<N; i++) 
	{
		if ((a[i] + b[i]) != c[i]) 
		{
			cout << "Error:  "<<a[i]<<" + "<<b[i]<<" != "<<c[i]<<endl;
			success = false;
		}
	}
	if (success)    cout << "We did it!\n" ;

	HANDLE_ERROR( cudaFree( dev_a ) );
	HANDLE_ERROR( cudaFree( dev_b ) );
	HANDLE_ERROR( cudaFree( dev_c ) );

	delete[] a;delete[] b; delete[] c;
	// 测试代码结束
	HANDLE_ERROR ( cudaEventRecord( timeEndEvent, 0) );
	HANDLE_ERROR ( cudaEventSynchronize( timeEndEvent ) );
	float elapsedTime = 0 ;
	HANDLE_ERROR ( cudaEventElapsedTime( & elapsedTime, timeStartEvent, timeEndEvent ) );

	cout << "elapsedTime  " << elapsedTime << " ms. ";
	HANDLE_ERROR( cudaEventDestroy( timeStartEvent ) );
	HANDLE_ERROR( cudaEventDestroy( timeEndEvent ) );
	return 0;
}
http://cuda.it168.com/a2011/0810/1230/000001230729.shtml

你可能感兴趣的:(CUDA: 检验执行时间)