Test latency for clEnqueueNDRangeKernel
http://pastebin.com/fije3CKf
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <CL/opencl.h>
cl_int cl_error; // OpenCL error code
cl_device_id device_id; // The chosen device
cl_program program; // OpenCL program
/**//** Formats the standard MACROS __FILE__ and __LINE__ for message print.
*/
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define AT __FILE__ ":" TOSTRING(__LINE__)
#define DEBUG_BUFFER_SIZE 4096
char * OpenCL_error_to_string(int error);
#define dump(msg,) \
fprintf(stderr, AT msg,##__VA_ARGS__)
#define OpenCL_test_execution(msg,error) \
do { \
if(CL_SUCCESS != error) { \
dump("The runtime error is %s\n", \
(char *)OpenCL_error_to_string(error)); \
exit(EXIT_FAILURE); \
} \
} while (0)
static double t_start, t_end; // Timing
double timer_get_time()
{
struct timeval t;
if (gettimeofday (&t, NULL) != 0) {
perror("Error gettimeofday !\n");
exit(1);
}
return (t.tv_sec + t.tv_usec * 1.0e-6);
}
void timer_start() {
t_start = timer_get_time();
}
void timer_stop_display( char *msg ) {
t_end = timer_get_time();
printf ("%s : %0.1lf\n", msg, (t_end - t_start)*1000);
}
void openclSimpleCopy(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t n) {
// Host data
int a[n],b[n];
int _i;
// Init
for(_i=0;_i<n;_i++) {
a[_i]=n-_i;
b[_i]=0;
}
// Buffers on the device
cl_mem a_dev = clCreateBuffer(context,
CL_MEM_READ_WRITE,
n * sizeof(int),
NULL,
&cl_error);
OpenCL_test_execution("Create Buffer",cl_error);
cl_mem b_dev = clCreateBuffer(context,
CL_MEM_READ_WRITE,
n * sizeof(int),
NULL,
&cl_error);
OpenCL_test_execution("Create Buffer",cl_error);
// 3 events is enough here
cl_event event1;
cl_event event2;
cl_event event3;
// Initialize buffer on the device
cl_error = clEnqueueWriteBuffer(queue,
a_dev,
CL_TRUE,
0,
n * sizeof(int),
a,
0,
NULL,
&event1);
OpenCL_test_execution("Write to Buffer",cl_error);
// Shouldn't be useful, I used a blocking write !
clFlush(queue);
clWaitForEvents(1,&event1);
// Arguments for the kernel
cl_error = clSetKernelArg(kernel,0,sizeof(a_dev), &a_dev);
OpenCL_test_execution("Set argument 0 ",cl_error);
cl_error = clSetKernelArg(kernel,1,sizeof(b_dev), &b_dev);
OpenCL_test_execution("Set argument 1",cl_error);
timer_start();
cl_error = clEnqueueNDRangeKernel(queue,
kernel,
1,
NULL,
&n,
NULL,
1,
&event1,
&event2);
timer_stop_display("Time for Enqueue");
OpenCL_test_execution("Enqueue kernel",cl_error);
OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event2));
cl_error = clEnqueueReadBuffer(queue,
b_dev,
CL_TRUE,
0,
n * sizeof(int),
b,
1,
&event2,
&event3);
OpenCL_test_execution("Read from buffer",cl_error);
OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event3));
// Check result
for(_i=0;_i<n;_i++) {
if(a[_i]!=b[_i]) {
printf("Error %d : %d!=%d\n",_i,a[_i],b[_i]);
exit(-1);
}
}
OpenCL_test_execution("Release mem object",clReleaseMemObject (a_dev));
OpenCL_test_execution("Release mem object",clReleaseMemObject (b_dev));
}
int main(int argc, char **argv) {
int platform_num = 0; // Platform number
int device_num = 0; // Device number
#define DEVICE_TYPE CL_DEVICE_TYPE_ALL
cl_int cl_error; // OpenCL error code
cl_kernel kernel = NULL;
// Chosing platform
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
if(num_platforms <= 0) {
dump("No OpenCL platforms found :-(\n");
exit(-1);
}
cl_platform_id platform_ids[num_platforms];
clGetPlatformIDs(num_platforms, platform_ids, NULL);
if(platform_num < 0 || platform_num >= num_platforms) {
dump("Invalid platform: %d\n", platform_num);
exit(EXIT_FAILURE);
}
// platform_id hold the chosen platform
cl_platform_id platform_id = platform_ids[platform_num];
// Chosing the device
cl_uint num_devices;
OpenCL_test_execution("Get number of devices", clGetDeviceIDs(platform_id, DEVICE_TYPE, 0, NULL, &num_devices));
if(num_devices <= 0) {
dump("No devices found associated to this OpenCL platform :-(\n");
exit(-1);
}
// Allocate spaces for devices
cl_device_id devices[num_devices];
// Get devices list
OpenCL_test_execution("Get devices list", clGetDeviceIDs(platform_id, DEVICE_TYPE, num_devices, devices, NULL));
/**//* Create a context for all devices */
cl_context context = clCreateContext(0,
num_devices,
devices,
NULL,
"from 'context'",
&cl_error);
OpenCL_test_execution("Context creation",cl_error);
// Here is the device ID
device_id = devices[device_num];
/**//* Create an in-order queue for this device */
cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, &cl_error);
OpenCL_test_execution("Create command queue",cl_error);
// END OF OPENCL INITIALIZATION
const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
" int i = get_global_id(0);"
" b[i]=a[i];"
"}";
program = clCreateProgramWithSource(context,
1,
&kernel_str,
NULL,
&cl_error);
OpenCL_test_execution("Create program with source",cl_error);
cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
OpenCL_test_execution("Build Program",cl_error);
kernel = clCreateKernel(program, "copy", &cl_error);
OpenCL_test_execution("Create kernel",cl_error);
// Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
size_t n = 10000000;
int _i;
// Run the sequence many times
printf("Run with n = %zu\n",n);
for(_i=0;_i<10;_i++) {
openclSimpleCopy(context, queue, kernel,n);
}
// Run the same sequence with a smaller problem size
n = n/100;
printf("Run with n = %zu\n",n);
for(_i=0;_i<10;_i++) {
openclSimpleCopy(context, queue, kernel,n);
}
}
char * OpenCL_error_to_string(int error) {
switch (error)
{
case CL_SUCCESS:
return (char *)"Success";
case CL_DEVICE_NOT_FOUND:
return (char *)"Device Not Found";
case CL_DEVICE_NOT_AVAILABLE:
return (char *)"Device Not Available";
case CL_COMPILER_NOT_AVAILABLE:
return (char *)"Compiler Not Available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return (char *)"Mem Object Allocation Failure";
case CL_OUT_OF_RESOURCES:
return (char *)"Out Of Ressources";
case CL_OUT_OF_HOST_MEMORY:
return (char *)"Out Of Host Memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return (char *)"Profiling Info Not Available";
case CL_MEM_COPY_OVERLAP:
return (char *)"Mem Copy Overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return (char *)"Image Format Mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return (char *)"Image Format Not Supported";
case CL_BUILD_PROGRAM_FAILURE: {
#define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
static char debug_buffer[DEBUG_BUFFER_SIZE]; // Static to be returned
strncat(debug_buffer,CL_BUILD_PROGRAM_FAILURE_MSG,DEBUG_BUFFER_SIZE);
clGetProgramBuildInfo(program,
device_id,
CL_PROGRAM_BUILD_LOG ,
DEBUG_BUFFER_SIZE,
debug_buffer+strlen(CL_BUILD_PROGRAM_FAILURE_MSG),
NULL);
return (char *)debug_buffer;
}
case CL_MAP_FAILURE:
return (char *)"Map Failure";
case CL_INVALID_VALUE:
return (char *)"Invalid Value";
case CL_INVALID_DEVICE_TYPE:
return (char *)"Invalid Device Type";
case CL_INVALID_PLATFORM:
return (char *)"Invalid Platform";
case CL_INVALID_DEVICE:
return (char *)"Invalid Device";
case CL_INVALID_CONTEXT:
return (char *)"Invalid Context";
case CL_INVALID_QUEUE_PROPERTIES:
return (char *)"Invalid Queue Properties";
case CL_INVALID_COMMAND_QUEUE:
return (char *)"Invalid Command Queue";
case CL_INVALID_HOST_PTR:
return (char *)"Invalid Host Ptr";
case CL_INVALID_MEM_OBJECT:
return (char *)"Invalid Mem Object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return (char *)"Invalid Image Format Descriptor";
case CL_INVALID_IMAGE_SIZE:
return (char *)"Invalid Image Size";
case CL_INVALID_SAMPLER:
return (char *)"Invalid Sampler";
case CL_INVALID_BINARY:
return (char *)"Invalid Binary";
case CL_INVALID_BUILD_OPTIONS:
return (char *)"Invalid Build Options";
case CL_INVALID_PROGRAM:
return (char *)"Invalid Program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return (char *)"Invalid Program Executable";
case CL_INVALID_KERNEL_NAME:
return (char *)"Invalid Kernel Name";
case CL_INVALID_KERNEL_DEFINITION:
return (char *)"Invalid Kernel Definition";
case CL_INVALID_KERNEL:
return (char *)"Invalid Kernel";
case CL_INVALID_ARG_INDEX:
return (char *)"Invalid Arg Index";
case CL_INVALID_ARG_VALUE:
return (char *)"Invalid Arg Value";
case CL_INVALID_ARG_SIZE:
return (char *)"Invalid Arg Size";
case CL_INVALID_KERNEL_ARGS:
return (char *)"Invalid Kernel Args";
case CL_INVALID_WORK_DIMENSION:
return (char *)"Invalid Work Dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return (char *)"Invalid Work Group Size";
case CL_INVALID_WORK_ITEM_SIZE:
return (char *)"Invalid Work Item Size";
case CL_INVALID_GLOBAL_OFFSET:
return (char *)"Invalid Global Offset";
case CL_INVALID_EVENT_WAIT_LIST:
return (char *)"Invalid Event Wait List";
case CL_INVALID_EVENT:
return (char *)"Invalid Event";
case CL_INVALID_OPERATION:
return (char *)"Invalid Operation";
case CL_INVALID_GL_OBJECT:
return (char *)"Invalid GL Object";
case CL_INVALID_BUFFER_SIZE:
return (char *)"Invalid Buffer Size";
case CL_INVALID_MIP_LEVEL:
return (char *)"Invalid Mip Level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return (char *)"Invalid Global Work Size";
default:
break;
}
return "Unknown";
}
-
#include <stdlib.h>
-
#include <stdio.h>
-
#include <string.h>
-
#include <CL/opencl.h>
-
-
cl_int cl_error ; // OpenCL error code
-
cl_device_id device_id ; // The chosen device
-
cl_program program ; // OpenCL program
-
-
-
/** Formats the standard MACROS __FILE__ and __LINE__ for message print.
-
*/
-
#define STRINGIFY(x) #x
-
#define TOSTRING(x) STRINGIFY(x)
-
#define AT __FILE__ ":" TOSTRING(__LINE__)
-
-
-
#define DEBUG_BUFFER_SIZE 4096
-
char * OpenCL_error_to_string ( int error ) ;
-
-
#define dump(msg,...) \
-
fprintf(stderr, AT msg,##__VA_ARGS__)
-
-
-
#define OpenCL_test_execution(msg,error) \
-
do { \
-
if(CL_SUCCESS != error) { \
-
dump("The runtime error is %s\n", \
-
(char *)OpenCL_error_to_string(error)); \
-
exit(EXIT_FAILURE); \
-
} \
-
} while (0)
-
-
-
-
-
static double t_start , t_end ; // Timing
-
double timer_get_time ( )
-
{
-
struct timeval t ;
-
if (gettimeofday ( &t , NULL ) != 0 ) {
-
perror ( "Error gettimeofday !\n" ) ;
-
exit ( 1 ) ;
-
}
-
return (t. tv_sec + t. tv_usec * 1.0e-6 ) ;
-
}
-
-
void timer_start ( ) {
-
t_start = timer_get_time ( ) ;
-
}
-
-
void timer_stop_display ( char *msg ) {
-
t_end = timer_get_time ( ) ;
-
printf ( "%s : %0.1lf\n" , msg , (t_end - t_start ) * 1000 ) ;
-
}
-
-
-
-
void openclSimpleCopy (cl_context context , cl_command_queue queue , cl_kernel kernel , size_t n ) {
-
// Host data
-
int a [n ] ,b [n ] ;
-
int _i ;
-
-
// Init
-
for (_i = 0 ;_i <n ;_i ++ ) {
-
a [_i ] =n -_i ;
-
b [_i ] = 0 ;
-
}
-
-
// Buffers on the device
-
cl_mem a_dev = clCreateBuffer (context ,
-
CL_MEM_READ_WRITE ,
-
n * sizeof ( int ) ,
-
NULL ,
-
&cl_error ) ;
-
OpenCL_test_execution ( "Create Buffer" ,cl_error ) ;
-
-
cl_mem b_dev = clCreateBuffer (context ,
-
CL_MEM_READ_WRITE ,
-
n * sizeof ( int ) ,
-
NULL ,
-
&cl_error ) ;
-
OpenCL_test_execution ( "Create Buffer" ,cl_error ) ;
-
-
-
// 3 events is enough here
-
cl_event event1 ;
-
cl_event event2 ;
-
cl_event event3 ;
-
-
-
-
-
// Initialize buffer on the device
-
cl_error = clEnqueueWriteBuffer (queue ,
-
a_dev ,
-
CL_TRUE ,
-
0 ,
-
n * sizeof ( int ) ,
-
a ,
-
0 ,
-
NULL ,
-
&event1 ) ;
-
OpenCL_test_execution ( "Write to Buffer" ,cl_error ) ;
-
-
// Shouldn't be useful, I used a blocking write !
-
clFlush (queue ) ;
-
clWaitForEvents ( 1 ,&event1 ) ;
-
-
-
// Arguments for the kernel
-
cl_error = clSetKernelArg (kernel , 0 , sizeof (a_dev ) , &a_dev ) ;
-
OpenCL_test_execution ( "Set argument 0 " ,cl_error ) ;
-
-
cl_error = clSetKernelArg (kernel , 1 , sizeof (b_dev ) , &b_dev ) ;
-
OpenCL_test_execution ( "Set argument 1" ,cl_error ) ;
-
-
timer_start ( ) ;
-
cl_error = clEnqueueNDRangeKernel (queue ,
-
kernel ,
-
1 ,
-
NULL ,
-
&n ,
-
NULL ,
-
1 ,
-
&event1 ,
-
&event2 ) ;
-
-
timer_stop_display ( "Time for Enqueue" ) ;
-
OpenCL_test_execution ( "Enqueue kernel" ,cl_error ) ;
-
-
OpenCL_test_execution ( "clWaitForEvents" ,clWaitForEvents ( 1 ,&event2 ) ) ;
-
-
cl_error = clEnqueueReadBuffer (queue ,
-
b_dev ,
-
CL_TRUE ,
-
0 ,
-
n * sizeof ( int ) ,
-
b ,
-
1 ,
-
&event2 ,
-
&event3 ) ;
-
OpenCL_test_execution ( "Read from buffer" ,cl_error ) ;
-
-
OpenCL_test_execution ( "clWaitForEvents" ,clWaitForEvents ( 1 ,&event3 ) ) ;
-
-
-
// Check result
-
for (_i = 0 ;_i <n ;_i ++ ) {
-
if (a [_i ] !=b [_i ] ) {
-
printf ( "Error %d : %d!=%d\n" ,_i ,a [_i ] ,b [_i ] ) ;
-
exit ( - 1 ) ;
-
}
-
}
-
OpenCL_test_execution ( "Release mem object" ,clReleaseMemObject (a_dev ) ) ;
-
OpenCL_test_execution ( "Release mem object" ,clReleaseMemObject (b_dev ) ) ;
-
-
}
-
-
-
-
int main ( int argc , char **argv ) {
-
-
int platform_num = 0 ; // Platform number
-
int device_num = 0 ; // Device number
-
#define DEVICE_TYPE CL_DEVICE_TYPE_ALL
-
-
cl_int cl_error ; // OpenCL error code
-
cl_kernel kernel = NULL ;
-
-
-
// Chosing platform
-
cl_uint num_platforms ;
-
clGetPlatformIDs ( 0 , NULL , &num_platforms ) ;
-
if (num_platforms <= 0 ) {
-
dump ( "No OpenCL platforms found :-(\n" ) ;
-
exit ( - 1 ) ;
-
}
-
-
cl_platform_id platform_ids [num_platforms ] ;
-
clGetPlatformIDs (num_platforms , platform_ids , NULL ) ;
-
-
if (platform_num < 0 || platform_num >= num_platforms ) {
-
dump ( "Invalid platform: %d\n" , platform_num ) ;
-
exit (EXIT_FAILURE ) ;
-
}
-
-
// platform_id hold the chosen platform
-
cl_platform_id platform_id = platform_ids [platform_num ] ;
-
-
// Chosing the device
-
-
cl_uint num_devices ;
-
OpenCL_test_execution ( "Get number of devices" , clGetDeviceIDs (platform_id , DEVICE_TYPE , 0 , NULL , &num_devices ) ) ;
-
-
if (num_devices <= 0 ) {
-
dump ( "No devices found associated to this OpenCL platform :-(\n" ) ;
-
exit ( - 1 ) ;
-
}
-
-
// Allocate spaces for devices
-
cl_device_id devices [num_devices ] ;
-
-
// Get devices list
-
OpenCL_test_execution ( "Get devices list" , clGetDeviceIDs (platform_id , DEVICE_TYPE , num_devices , devices , NULL ) ) ;
-
-
/* Create a context for all devices */
-
cl_context context = clCreateContext ( 0 ,
-
num_devices ,
-
devices ,
-
NULL ,
-
"from 'context'" ,
-
&cl_error ) ;
-
OpenCL_test_execution ( "Context creation" ,cl_error ) ;
-
-
// Here is the device ID
-
device_id = devices [device_num ] ;
-
-
/* Create an in-order queue for this device */
-
cl_command_queue queue = clCreateCommandQueue (context , device_id , 0 , &cl_error ) ;
-
OpenCL_test_execution ( "Create command queue" ,cl_error ) ;
-
-
// END OF OPENCL INITIALIZATION
-
const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
-
" int i = get_global_id(0);"
-
" b[i]=a[i];"
-
"}" ;
-
-
program = clCreateProgramWithSource (context ,
-
1 ,
-
&kernel_str ,
-
NULL ,
-
&cl_error ) ;
-
OpenCL_test_execution ( "Create program with source" ,cl_error ) ;
-
-
cl_error = clBuildProgram (program , 0 , NULL , NULL , NULL , NULL ) ;
-
OpenCL_test_execution ( "Build Program" ,cl_error ) ;
-
-
kernel = clCreateKernel (program , "copy" , &cl_error ) ;
-
OpenCL_test_execution ( "Create kernel" ,cl_error ) ;
-
-
-
-
-
// Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
-
size_t n = 10000000 ;
-
int _i ;
-
// Run the sequence many times
-
printf ( "Run with n = %zu\n" ,n ) ;
-
for (_i = 0 ;_i < 10 ;_i ++ ) {
-
openclSimpleCopy (context , queue , kernel ,n ) ;
-
}
-
-
// Run the same sequence with a smaller problem size
-
n = n / 100 ;
-
printf ( "Run with n = %zu\n" ,n ) ;
-
for (_i = 0 ;_i < 10 ;_i ++ ) {
-
openclSimpleCopy (context , queue , kernel ,n ) ;
-
}
-
-
-
}
-
-
-
-
char * OpenCL_error_to_string ( int error ) {
-
switch (error )
-
{
-
case CL_SUCCESS :
-
return ( char * ) "Success" ;
-
case CL_DEVICE_NOT_FOUND :
-
return ( char * ) "Device Not Found" ;
-
case CL_DEVICE_NOT_AVAILABLE :
-
return ( char * ) "Device Not Available" ;
-
case CL_COMPILER_NOT_AVAILABLE :
-
return ( char * ) "Compiler Not Available" ;
-
case CL_MEM_OBJECT_ALLOCATION_FAILURE :
-
return ( char * ) "Mem Object Allocation Failure" ;
-
case CL_OUT_OF_RESOURCES :
-
return ( char * ) "Out Of Ressources" ;
-
case CL_OUT_OF_HOST_MEMORY :
-
return ( char * ) "Out Of Host Memory" ;
-
case CL_PROFILING_INFO_NOT_AVAILABLE :
-
return ( char * ) "Profiling Info Not Available" ;
-
case CL_MEM_COPY_OVERLAP :
-
return ( char * ) "Mem Copy Overlap" ;
-
case CL_IMAGE_FORMAT_MISMATCH :
-
return ( char * ) "Image Format Mismatch" ;
-
case CL_IMAGE_FORMAT_NOT_SUPPORTED :
-
return ( char * ) "Image Format Not Supported" ;
-
case CL_BUILD_PROGRAM_FAILURE : {
-
#define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
-
static char debug_buffer [DEBUG_BUFFER_SIZE ] ; // Static to be returned
-
strncat (debug_buffer ,CL_BUILD_PROGRAM_FAILURE_MSG ,DEBUG_BUFFER_SIZE ) ;
-
clGetProgramBuildInfo (program ,
-
device_id ,
-
CL_PROGRAM_BUILD_LOG ,
-
DEBUG_BUFFER_SIZE ,
-
debug_buffer + strlen (CL_BUILD_PROGRAM_FAILURE_MSG ) ,
-
NULL ) ;
-
return ( char * )debug_buffer ;
-
}
-
case CL_MAP_FAILURE :
-
return ( char * ) "Map Failure" ;
-
case CL_INVALID_VALUE :
-
return ( char * ) "Invalid Value" ;
-
case CL_INVALID_DEVICE_TYPE :
-
return ( char * ) "Invalid Device Type" ;
-
case CL_INVALID_PLATFORM :
-
return ( char * ) "Invalid Platform" ;
-
case CL_INVALID_DEVICE :
-
return ( char * ) "Invalid Device" ;
-
case CL_INVALID_CONTEXT :
-
return ( char * ) "Invalid Context" ;
-
case CL_INVALID_QUEUE_PROPERTIES :
-
return ( char * ) "Invalid Queue Properties" ;
-
case CL_INVALID_COMMAND_QUEUE :
-
return ( char * ) "Invalid Command Queue" ;
-
case CL_INVALID_HOST_PTR :
-
return ( char * ) "Invalid Host Ptr" ;
-
case CL_INVALID_MEM_OBJECT :
-
return ( char * ) "Invalid Mem Object" ;
-
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR :
-
return ( char * ) "Invalid Image Format Descriptor" ;
-
case CL_INVALID_IMAGE_SIZE :
-
return ( char * ) "Invalid Image Size" ;
-
case CL_INVALID_SAMPLER :
-
return ( char * ) "Invalid Sampler" ;
-
case CL_INVALID_BINARY :
-
return ( char * ) "Invalid Binary" ;
-
case CL_INVALID_BUILD_OPTIONS :
-
return ( char * ) "Invalid Build Options" ;
-
case CL_INVALID_PROGRAM :
-
return ( char * ) "Invalid Program" ;
-
case CL_INVALID_PROGRAM_EXECUTABLE :
-
return ( char * ) "Invalid Program Executable" ;
-
case CL_INVALID_KERNEL_NAME :
-
return ( char * ) "Invalid Kernel Name" ;
-
case CL_INVALID_KERNEL_DEFINITION :
-
return ( char * ) "Invalid Kernel Definition" ;
-
case CL_INVALID_KERNEL :
-
return ( char * ) "Invalid Kernel" ;
-
case CL_INVALID_ARG_INDEX :
-
return ( char * ) "Invalid Arg Index" ;
-
case CL_INVALID_ARG_VALUE :
-
return ( char * ) "Invalid Arg Value" ;
-
case CL_INVALID_ARG_SIZE :
-
return ( char * ) "Invalid Arg Size" ;
-
case CL_INVALID_KERNEL_ARGS :
-
return ( char * ) "Invalid Kernel Args" ;
-
case CL_INVALID_WORK_DIMENSION :
-
return ( char * ) "Invalid Work Dimension" ;
-
case CL_INVALID_WORK_GROUP_SIZE :
-
return ( char * ) "Invalid Work Group Size" ;
-
case CL_INVALID_WORK_ITEM_SIZE :
-
return ( char * ) "Invalid Work Item Size" ;
-
case CL_INVALID_GLOBAL_OFFSET :
-
return ( char * ) "Invalid Global Offset" ;
-
case CL_INVALID_EVENT_WAIT_LIST :
-
return ( char * ) "Invalid Event Wait List" ;
-
case CL_INVALID_EVENT :
-
return ( char * ) "Invalid Event" ;
-
case CL_INVALID_OPERATION :
-
return ( char * ) "Invalid Operation" ;
-
case CL_INVALID_GL_OBJECT :
-
return ( char * ) "Invalid GL Object" ;
-
case CL_INVALID_BUFFER_SIZE :
-
return ( char * ) "Invalid Buffer Size" ;
-
case CL_INVALID_MIP_LEVEL :
-
return ( char * ) "Invalid Mip Level" ;
-
case CL_INVALID_GLOBAL_WORK_SIZE :
-
return ( char * ) "Invalid Global Work Size" ;
-
default :
-
break ;
-
}
-
return "Unknown" ;
-
}
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <CL/opencl.h>
cl_int cl_error; // OpenCL error code
cl_device_id device_id; // The chosen device
cl_program program; // OpenCL program
/**//** Formats the standard MACROS __FILE__ and __LINE__ for message print.
*/
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define AT __FILE__ ":" TOSTRING(__LINE__)
#define DEBUG_BUFFER_SIZE 4096
char * OpenCL_error_to_string(int error);
#define dump(msg,) \
fprintf(stderr, AT msg,##__VA_ARGS__)
#define OpenCL_test_execution(msg,error) \
do { \
if(CL_SUCCESS != error) { \
dump("The runtime error is %s\n", \
(char *)OpenCL_error_to_string(error)); \
exit(EXIT_FAILURE); \
} \
} while (0)
static double t_start, t_end; // Timing
double timer_get_time()
{
struct timeval t;
if (gettimeofday (&t, NULL) != 0) {
perror("Error gettimeofday !\n");
exit(1);
}
return (t.tv_sec + t.tv_usec * 1.0e-6);
}
void timer_start() {
t_start = timer_get_time();
}
void timer_stop_display( char *msg ) {
t_end = timer_get_time();
printf ("%s : %0.1lf\n", msg, (t_end - t_start)*1000);
}
void openclSimpleCopy(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t n) {
// Host data
int a[n],b[n];
int _i;
// Init
for(_i=0;_i<n;_i++) {
a[_i]=n-_i;
b[_i]=0;
}
// Buffers on the device
cl_mem a_dev = clCreateBuffer(context,
CL_MEM_READ_WRITE,
n * sizeof(int),
NULL,
&cl_error);
OpenCL_test_execution("Create Buffer",cl_error);
cl_mem b_dev = clCreateBuffer(context,
CL_MEM_READ_WRITE,
n * sizeof(int),
NULL,
&cl_error);
OpenCL_test_execution("Create Buffer",cl_error);
// 3 events is enough here
cl_event event1;
cl_event event2;
cl_event event3;
// Initialize buffer on the device
cl_error = clEnqueueWriteBuffer(queue,
a_dev,
CL_TRUE,
0,
n * sizeof(int),
a,
0,
NULL,
&event1);
OpenCL_test_execution("Write to Buffer",cl_error);
// Shouldn't be useful, I used a blocking write !
clFlush(queue);
clWaitForEvents(1,&event1);
// Arguments for the kernel
cl_error = clSetKernelArg(kernel,0,sizeof(a_dev), &a_dev);
OpenCL_test_execution("Set argument 0 ",cl_error);
cl_error = clSetKernelArg(kernel,1,sizeof(b_dev), &b_dev);
OpenCL_test_execution("Set argument 1",cl_error);
timer_start();
cl_error = clEnqueueNDRangeKernel(queue,
kernel,
1,
NULL,
&n,
NULL,
1,
&event1,
&event2);
timer_stop_display("Time for Enqueue");
OpenCL_test_execution("Enqueue kernel",cl_error);
OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event2));
cl_error = clEnqueueReadBuffer(queue,
b_dev,
CL_TRUE,
0,
n * sizeof(int),
b,
1,
&event2,
&event3);
OpenCL_test_execution("Read from buffer",cl_error);
OpenCL_test_execution("clWaitForEvents",clWaitForEvents(1,&event3));
// Check result
for(_i=0;_i<n;_i++) {
if(a[_i]!=b[_i]) {
printf("Error %d : %d!=%d\n",_i,a[_i],b[_i]);
exit(-1);
}
}
OpenCL_test_execution("Release mem object",clReleaseMemObject (a_dev));
OpenCL_test_execution("Release mem object",clReleaseMemObject (b_dev));
}
int main(int argc, char **argv) {
int platform_num = 0; // Platform number
int device_num = 0; // Device number
#define DEVICE_TYPE CL_DEVICE_TYPE_ALL
cl_int cl_error; // OpenCL error code
cl_kernel kernel = NULL;
// Chosing platform
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
if(num_platforms <= 0) {
dump("No OpenCL platforms found :-(\n");
exit(-1);
}
cl_platform_id platform_ids[num_platforms];
clGetPlatformIDs(num_platforms, platform_ids, NULL);
if(platform_num < 0 || platform_num >= num_platforms) {
dump("Invalid platform: %d\n", platform_num);
exit(EXIT_FAILURE);
}
// platform_id hold the chosen platform
cl_platform_id platform_id = platform_ids[platform_num];
// Chosing the device
cl_uint num_devices;
OpenCL_test_execution("Get number of devices", clGetDeviceIDs(platform_id, DEVICE_TYPE, 0, NULL, &num_devices));
if(num_devices <= 0) {
dump("No devices found associated to this OpenCL platform :-(\n");
exit(-1);
}
// Allocate spaces for devices
cl_device_id devices[num_devices];
// Get devices list
OpenCL_test_execution("Get devices list", clGetDeviceIDs(platform_id, DEVICE_TYPE, num_devices, devices, NULL));
/**//* Create a context for all devices */
cl_context context = clCreateContext(0,
num_devices,
devices,
NULL,
"from 'context'",
&cl_error);
OpenCL_test_execution("Context creation",cl_error);
// Here is the device ID
device_id = devices[device_num];
/**//* Create an in-order queue for this device */
cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, &cl_error);
OpenCL_test_execution("Create command queue",cl_error);
// END OF OPENCL INITIALIZATION
const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
" int i = get_global_id(0);"
" b[i]=a[i];"
"}";
program = clCreateProgramWithSource(context,
1,
&kernel_str,
NULL,
&cl_error);
OpenCL_test_execution("Create program with source",cl_error);
cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
OpenCL_test_execution("Build Program",cl_error);
kernel = clCreateKernel(program, "copy", &cl_error);
OpenCL_test_execution("Create kernel",cl_error);
// Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
size_t n = 10000000;
int _i;
// Run the sequence many times
printf("Run with n = %zu\n",n);
for(_i=0;_i<10;_i++) {
openclSimpleCopy(context, queue, kernel,n);
}
// Run the same sequence with a smaller problem size
n = n/100;
printf("Run with n = %zu\n",n);
for(_i=0;_i<10;_i++) {
openclSimpleCopy(context, queue, kernel,n);
}
}
char * OpenCL_error_to_string(int error) {
switch (error)
{
case CL_SUCCESS:
return (char *)"Success";
case CL_DEVICE_NOT_FOUND:
return (char *)"Device Not Found";
case CL_DEVICE_NOT_AVAILABLE:
return (char *)"Device Not Available";
case CL_COMPILER_NOT_AVAILABLE:
return (char *)"Compiler Not Available";
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
return (char *)"Mem Object Allocation Failure";
case CL_OUT_OF_RESOURCES:
return (char *)"Out Of Ressources";
case CL_OUT_OF_HOST_MEMORY:
return (char *)"Out Of Host Memory";
case CL_PROFILING_INFO_NOT_AVAILABLE:
return (char *)"Profiling Info Not Available";
case CL_MEM_COPY_OVERLAP:
return (char *)"Mem Copy Overlap";
case CL_IMAGE_FORMAT_MISMATCH:
return (char *)"Image Format Mismatch";
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
return (char *)"Image Format Not Supported";
case CL_BUILD_PROGRAM_FAILURE: {
#define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
static char debug_buffer[DEBUG_BUFFER_SIZE]; // Static to be returned
strncat(debug_buffer,CL_BUILD_PROGRAM_FAILURE_MSG,DEBUG_BUFFER_SIZE);
clGetProgramBuildInfo(program,
device_id,
CL_PROGRAM_BUILD_LOG ,
DEBUG_BUFFER_SIZE,
debug_buffer+strlen(CL_BUILD_PROGRAM_FAILURE_MSG),
NULL);
return (char *)debug_buffer;
}
case CL_MAP_FAILURE:
return (char *)"Map Failure";
case CL_INVALID_VALUE:
return (char *)"Invalid Value";
case CL_INVALID_DEVICE_TYPE:
return (char *)"Invalid Device Type";
case CL_INVALID_PLATFORM:
return (char *)"Invalid Platform";
case CL_INVALID_DEVICE:
return (char *)"Invalid Device";
case CL_INVALID_CONTEXT:
return (char *)"Invalid Context";
case CL_INVALID_QUEUE_PROPERTIES:
return (char *)"Invalid Queue Properties";
case CL_INVALID_COMMAND_QUEUE:
return (char *)"Invalid Command Queue";
case CL_INVALID_HOST_PTR:
return (char *)"Invalid Host Ptr";
case CL_INVALID_MEM_OBJECT:
return (char *)"Invalid Mem Object";
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
return (char *)"Invalid Image Format Descriptor";
case CL_INVALID_IMAGE_SIZE:
return (char *)"Invalid Image Size";
case CL_INVALID_SAMPLER:
return (char *)"Invalid Sampler";
case CL_INVALID_BINARY:
return (char *)"Invalid Binary";
case CL_INVALID_BUILD_OPTIONS:
return (char *)"Invalid Build Options";
case CL_INVALID_PROGRAM:
return (char *)"Invalid Program";
case CL_INVALID_PROGRAM_EXECUTABLE:
return (char *)"Invalid Program Executable";
case CL_INVALID_KERNEL_NAME:
return (char *)"Invalid Kernel Name";
case CL_INVALID_KERNEL_DEFINITION:
return (char *)"Invalid Kernel Definition";
case CL_INVALID_KERNEL:
return (char *)"Invalid Kernel";
case CL_INVALID_ARG_INDEX:
return (char *)"Invalid Arg Index";
case CL_INVALID_ARG_VALUE:
return (char *)"Invalid Arg Value";
case CL_INVALID_ARG_SIZE:
return (char *)"Invalid Arg Size";
case CL_INVALID_KERNEL_ARGS:
return (char *)"Invalid Kernel Args";
case CL_INVALID_WORK_DIMENSION:
return (char *)"Invalid Work Dimension";
case CL_INVALID_WORK_GROUP_SIZE:
return (char *)"Invalid Work Group Size";
case CL_INVALID_WORK_ITEM_SIZE:
return (char *)"Invalid Work Item Size";
case CL_INVALID_GLOBAL_OFFSET:
return (char *)"Invalid Global Offset";
case CL_INVALID_EVENT_WAIT_LIST:
return (char *)"Invalid Event Wait List";
case CL_INVALID_EVENT:
return (char *)"Invalid Event";
case CL_INVALID_OPERATION:
return (char *)"Invalid Operation";
case CL_INVALID_GL_OBJECT:
return (char *)"Invalid GL Object";
case CL_INVALID_BUFFER_SIZE:
return (char *)"Invalid Buffer Size";
case CL_INVALID_MIP_LEVEL:
return (char *)"Invalid Mip Level";
case CL_INVALID_GLOBAL_WORK_SIZE:
return (char *)"Invalid Global Work Size";
default:
break;
}
return "Unknown";
}