Test latency for clEnqueueNDRangeKernel

Test latency for clEnqueueNDRangeKernel
http://pastebin.com/fije3CKf

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <CL/opencl.h>
  5.  
  6. cl_int cl_error ; // OpenCL error code
  7. cl_device_id device_id ; // The chosen device
  8. cl_program program ; // OpenCL program
  9.  
  10.  
  11. /** Formats the standard MACROS  __FILE__ and __LINE__ for message print.
  12.  */
  13. #define STRINGIFY(x) #x
  14. #define TOSTRING(x) STRINGIFY(x)
  15. #define AT __FILE__ ":" TOSTRING(__LINE__)
  16.  
  17.  
  18. #define DEBUG_BUFFER_SIZE 4096
  19. char * OpenCL_error_to_string ( int error ) ;
  20.  
  21. #define dump(msg,...) \
  22.     fprintf(stderr, AT msg,##__VA_ARGS__)
  23.  
  24.  
  25. #define OpenCL_test_execution(msg,error)         \
  26.   do {                  \
  27.     if(CL_SUCCESS != error) {           \
  28.       dump("The runtime error is %s\n",    \
  29.         (char *)OpenCL_error_to_string(error));      \
  30.       exit(EXIT_FAILURE);            \
  31.     }                 \
  32.   } while (0)
  33.  
  34.  
  35.  
  36.  
  37. static double t_start , t_end ; // Timing
  38. double timer_get_time ( )
  39. {
  40.     struct timeval t ;
  41.     if (gettimeofday ( &t , NULL ) != 0 ) {
  42.       perror ( "Error gettimeofday !\n" ) ;
  43.       exit ( 1 ) ;
  44.     }
  45.     return (t. tv_sec + t. tv_usec * 1.0e-6 ) ;
  46. }
  47.  
  48. void timer_start ( ) {
  49.   t_start = timer_get_time ( ) ;
  50. }
  51.  
  52. void timer_stop_display ( char *msg ) {
  53.   t_end = timer_get_time ( ) ;
  54.   printf ( "%s : %0.1lf\n" , msg , (t_end - t_start ) * 1000 ) ;
  55. }
  56.  
  57.  
  58.  
  59. void openclSimpleCopy (cl_context context , cl_command_queue queue , cl_kernel kernel , size_t n ) {
  60.   // Host data
  61.   int a [n ] ,b [n ] ;
  62.   int _i ;
  63.  
  64.   // Init
  65.   for (_i = 0 ;_i <n ;_i ++ ) {
  66.     a [_i ] =n -_i ;
  67.     b [_i ] = 0 ;
  68.   }
  69.  
  70.   // Buffers on the device
  71.   cl_mem a_dev = clCreateBuffer (context ,
  72.                                 CL_MEM_READ_WRITE ,
  73.                                 n * sizeof ( int ) ,
  74.                                 NULL ,
  75.                                 &cl_error ) ;
  76.   OpenCL_test_execution ( "Create Buffer" ,cl_error ) ;
  77.  
  78.   cl_mem b_dev = clCreateBuffer (context ,
  79.                                 CL_MEM_READ_WRITE ,
  80.                                 n * sizeof ( int ) ,
  81.                                 NULL ,
  82.                                 &cl_error ) ;
  83.   OpenCL_test_execution ( "Create Buffer" ,cl_error ) ;
  84.  
  85.  
  86.   // 3 events is enough here
  87.   cl_event event1 ;
  88.   cl_event event2 ;
  89.   cl_event event3 ;
  90.  
  91.  
  92.  
  93.  
  94.   // Initialize buffer on the device
  95.   cl_error =  clEnqueueWriteBuffer (queue ,
  96.                        a_dev ,
  97.                        CL_TRUE ,
  98.                         0 ,
  99.                        n * sizeof ( int ) ,
  100.                        a ,
  101.                         0 ,
  102.                        NULL ,
  103.                         &event1 ) ;
  104.   OpenCL_test_execution ( "Write to Buffer" ,cl_error ) ;
  105.  
  106.   // Shouldn't be useful, I used a blocking write !
  107.   clFlush (queue ) ;
  108.   clWaitForEvents ( 1 ,&event1 ) ;
  109.  
  110.  
  111.   // Arguments for the kernel
  112.   cl_error = clSetKernelArg (kernel , 0 , sizeof (a_dev ) , &a_dev ) ;
  113.   OpenCL_test_execution ( "Set argument 0 " ,cl_error ) ;
  114.  
  115.   cl_error = clSetKernelArg (kernel , 1 , sizeof (b_dev ) , &b_dev ) ;
  116.   OpenCL_test_execution ( "Set argument 1" ,cl_error ) ;
  117.  
  118.   timer_start ( ) ;
  119.   cl_error = clEnqueueNDRangeKernel (queue ,
  120.                                     kernel ,
  121.                                     1 ,
  122.                                     NULL ,
  123.                                     &n ,
  124.                                     NULL ,
  125.                                     1 ,
  126.                                     &event1 ,
  127.                                     &event2 ) ;
  128.  
  129.   timer_stop_display ( "Time for Enqueue" ) ;
  130.   OpenCL_test_execution ( "Enqueue kernel" ,cl_error ) ;
  131.  
  132.   OpenCL_test_execution ( "clWaitForEvents" ,clWaitForEvents ( 1 ,&event2 ) ) ;
  133.  
  134.   cl_error =  clEnqueueReadBuffer (queue ,
  135.                        b_dev ,
  136.                        CL_TRUE ,
  137.                         0 ,
  138.                        n * sizeof ( int ) ,
  139.                        b ,
  140.                         1 ,
  141.                         &event2 ,
  142.                         &event3 ) ;
  143.   OpenCL_test_execution ( "Read from buffer" ,cl_error ) ;
  144.  
  145.   OpenCL_test_execution ( "clWaitForEvents" ,clWaitForEvents ( 1 ,&event3 ) ) ;
  146.  
  147.  
  148.   // Check result
  149.   for (_i = 0 ;_i <n ;_i ++ ) {
  150.     if (a [_i ] !=b [_i ] ) {
  151.       printf ( "Error %d : %d!=%d\n" ,_i ,a [_i ] ,b [_i ] ) ;
  152.       exit ( - 1 ) ;
  153.     }
  154.   }
  155.   OpenCL_test_execution ( "Release mem object" ,clReleaseMemObject (a_dev ) ) ;
  156.   OpenCL_test_execution ( "Release mem object" ,clReleaseMemObject (b_dev ) ) ;
  157.  
  158. }
  159.  
  160.  
  161.  
  162. int main ( int argc , char **argv ) {
  163.  
  164.   int platform_num = 0 ; // Platform number
  165.   int device_num = 0 ; // Device number
  166. #define DEVICE_TYPE CL_DEVICE_TYPE_ALL
  167.  
  168.   cl_int cl_error ; // OpenCL error code
  169.   cl_kernel kernel = NULL ;
  170.  
  171.  
  172.   // Chosing platform
  173.   cl_uint num_platforms ;
  174.   clGetPlatformIDs ( 0 , NULL , &num_platforms ) ;
  175.   if (num_platforms <= 0 ) {
  176.     dump ( "No OpenCL platforms found :-(\n" ) ;
  177.     exit ( - 1 ) ;
  178.   }
  179.  
  180.   cl_platform_id platform_ids [num_platforms ] ;
  181.   clGetPlatformIDs (num_platforms , platform_ids , NULL ) ;
  182.  
  183.   if (platform_num < 0 || platform_num >= num_platforms ) {
  184.     dump ( "Invalid platform: %d\n" , platform_num ) ;
  185.     exit (EXIT_FAILURE ) ;
  186.   }
  187.  
  188.   // platform_id hold the chosen platform
  189.   cl_platform_id platform_id = platform_ids [platform_num ] ;
  190.  
  191.   // Chosing the device
  192.  
  193.   cl_uint num_devices ;
  194.   OpenCL_test_execution ( "Get number of devices" , clGetDeviceIDs (platform_id , DEVICE_TYPE , 0 , NULL , &num_devices ) ) ;
  195.  
  196.   if (num_devices <= 0 ) {
  197.     dump ( "No devices found associated to this OpenCL platform :-(\n" ) ;
  198.     exit ( - 1 ) ;
  199.   }
  200.  
  201.   // Allocate spaces for devices
  202.   cl_device_id devices [num_devices ] ;
  203.  
  204.   // Get devices list
  205.   OpenCL_test_execution ( "Get devices list" , clGetDeviceIDs (platform_id , DEVICE_TYPE , num_devices , devices , NULL ) ) ;
  206.  
  207.   /* Create a context for all devices */
  208.   cl_context context = clCreateContext ( 0 ,
  209.                                        num_devices ,
  210.                                        devices ,
  211.                                        NULL ,
  212.                                         "from 'context'" ,
  213.                                         &cl_error ) ;
  214.   OpenCL_test_execution ( "Context creation" ,cl_error ) ;
  215.  
  216.   // Here is the device ID
  217.   device_id = devices [device_num ] ;
  218.  
  219.   /* Create an in-order queue for this device */
  220.   cl_command_queue queue = clCreateCommandQueue (context , device_id , 0 , &cl_error ) ;
  221.   OpenCL_test_execution ( "Create command queue" ,cl_error ) ;
  222.  
  223.   // END OF OPENCL INITIALIZATION
  224.   const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
  225.                             " int i = get_global_id(0);"
  226.                             " b[i]=a[i];"
  227.                             "}" ;
  228.  
  229.   program = clCreateProgramWithSource (context ,
  230.                                       1 ,
  231.                                       &kernel_str ,
  232.                                       NULL ,
  233.                                       &cl_error ) ;
  234.   OpenCL_test_execution ( "Create program with source" ,cl_error ) ;
  235.  
  236.   cl_error = clBuildProgram (program , 0 , NULL , NULL , NULL , NULL ) ;
  237.   OpenCL_test_execution ( "Build Program" ,cl_error ) ;
  238.  
  239.   kernel = clCreateKernel (program , "copy" , &cl_error ) ;
  240.   OpenCL_test_execution ( "Create kernel" ,cl_error ) ;
  241.  
  242.  
  243.  
  244.  
  245.   // Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
  246.   size_t n = 10000000 ;
  247.   int _i ;
  248.   // Run the sequence many times
  249.   printf ( "Run with n = %zu\n" ,n ) ;
  250.   for (_i = 0 ;_i < 10 ;_i ++ ) {
  251.     openclSimpleCopy (context , queue , kernel ,n ) ;
  252.   }
  253.  
  254.   // Run the same sequence with a smaller problem size
  255.   n = n / 100 ;
  256.   printf ( "Run with n = %zu\n" ,n ) ;
  257.   for (_i = 0 ;_i < 10 ;_i ++ ) {
  258.     openclSimpleCopy (context , queue , kernel ,n ) ;
  259.   }
  260.  
  261.  
  262. }
  263.  
  264.  
  265.  
  266. char * OpenCL_error_to_string ( int error ) {
  267.   switch (error )
  268.     {
  269.     case CL_SUCCESS :
  270.       return ( char * ) "Success" ;
  271.     case CL_DEVICE_NOT_FOUND :
  272.       return ( char * ) "Device Not Found" ;
  273.     case CL_DEVICE_NOT_AVAILABLE :
  274.       return ( char * ) "Device Not Available" ;
  275.     case CL_COMPILER_NOT_AVAILABLE :
  276.       return ( char * ) "Compiler Not Available" ;
  277.     case CL_MEM_OBJECT_ALLOCATION_FAILURE :
  278.       return ( char * ) "Mem Object Allocation Failure" ;
  279.     case CL_OUT_OF_RESOURCES :
  280.       return ( char * ) "Out Of Ressources" ;
  281.     case CL_OUT_OF_HOST_MEMORY :
  282.       return ( char * ) "Out Of Host Memory" ;
  283.     case CL_PROFILING_INFO_NOT_AVAILABLE :
  284.       return ( char * ) "Profiling Info Not Available" ;
  285.     case CL_MEM_COPY_OVERLAP :
  286.       return ( char * ) "Mem Copy Overlap" ;
  287.     case CL_IMAGE_FORMAT_MISMATCH :
  288.       return ( char * ) "Image Format Mismatch" ;
  289.     case CL_IMAGE_FORMAT_NOT_SUPPORTED :
  290.       return ( char * ) "Image Format Not Supported" ;
  291.     case CL_BUILD_PROGRAM_FAILURE : {
  292.   #define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
  293.   static char debug_buffer [DEBUG_BUFFER_SIZE ] ; // Static to be returned
  294.   strncat (debug_buffer ,CL_BUILD_PROGRAM_FAILURE_MSG ,DEBUG_BUFFER_SIZE ) ;
  295.   clGetProgramBuildInfo (program ,
  296.           device_id ,
  297.           CL_PROGRAM_BUILD_LOG ,
  298.           DEBUG_BUFFER_SIZE ,
  299.           debug_buffer + strlen (CL_BUILD_PROGRAM_FAILURE_MSG ) ,
  300.           NULL ) ;
  301.       return ( char * )debug_buffer ;
  302.     }
  303.     case CL_MAP_FAILURE :
  304.       return ( char * ) "Map Failure" ;
  305.     case CL_INVALID_VALUE :
  306.       return ( char * ) "Invalid Value" ;
  307.     case CL_INVALID_DEVICE_TYPE :
  308.       return ( char * ) "Invalid Device Type" ;
  309.     case CL_INVALID_PLATFORM :
  310.       return ( char * ) "Invalid Platform" ;
  311.     case CL_INVALID_DEVICE :
  312.       return ( char * ) "Invalid Device" ;
  313.     case CL_INVALID_CONTEXT :
  314.       return ( char * ) "Invalid Context" ;
  315.     case CL_INVALID_QUEUE_PROPERTIES :
  316.       return ( char * ) "Invalid Queue Properties" ;
  317.     case CL_INVALID_COMMAND_QUEUE :
  318.       return ( char * ) "Invalid Command Queue" ;
  319.     case CL_INVALID_HOST_PTR :
  320.       return ( char * ) "Invalid Host Ptr" ;
  321.     case CL_INVALID_MEM_OBJECT :
  322.       return ( char * ) "Invalid Mem Object" ;
  323.     case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR :
  324.       return ( char * ) "Invalid Image Format Descriptor" ;
  325.     case CL_INVALID_IMAGE_SIZE :
  326.       return ( char * ) "Invalid Image Size" ;
  327.     case CL_INVALID_SAMPLER :
  328.       return ( char * ) "Invalid Sampler" ;
  329.     case CL_INVALID_BINARY :
  330.       return ( char * ) "Invalid Binary" ;
  331.     case CL_INVALID_BUILD_OPTIONS :
  332.       return ( char * ) "Invalid Build Options" ;
  333.     case CL_INVALID_PROGRAM :
  334.       return ( char * ) "Invalid Program" ;
  335.     case CL_INVALID_PROGRAM_EXECUTABLE :
  336.       return ( char * ) "Invalid Program Executable" ;
  337.     case CL_INVALID_KERNEL_NAME :
  338.       return ( char * ) "Invalid Kernel Name" ;
  339.     case CL_INVALID_KERNEL_DEFINITION :
  340.       return ( char * ) "Invalid Kernel Definition" ;
  341.     case CL_INVALID_KERNEL :
  342.       return ( char * ) "Invalid Kernel" ;
  343.     case CL_INVALID_ARG_INDEX :
  344.       return ( char * ) "Invalid Arg Index" ;
  345.     case CL_INVALID_ARG_VALUE :
  346.       return ( char * ) "Invalid Arg Value" ;
  347.     case CL_INVALID_ARG_SIZE :
  348.       return ( char * ) "Invalid Arg Size" ;
  349.     case CL_INVALID_KERNEL_ARGS :
  350.       return ( char * ) "Invalid Kernel Args" ;
  351.     case CL_INVALID_WORK_DIMENSION :
  352.       return ( char * ) "Invalid Work Dimension" ;
  353.     case CL_INVALID_WORK_GROUP_SIZE :
  354.       return ( char * ) "Invalid Work Group Size" ;
  355.     case CL_INVALID_WORK_ITEM_SIZE :
  356.       return ( char * ) "Invalid Work Item Size" ;
  357.     case CL_INVALID_GLOBAL_OFFSET :
  358.       return ( char * ) "Invalid Global Offset" ;
  359.     case CL_INVALID_EVENT_WAIT_LIST :
  360.       return ( char * ) "Invalid Event Wait List" ;
  361.     case CL_INVALID_EVENT :
  362.       return ( char * ) "Invalid Event" ;
  363.     case CL_INVALID_OPERATION :
  364.       return ( char * ) "Invalid Operation" ;
  365.     case CL_INVALID_GL_OBJECT :
  366.       return ( char * ) "Invalid GL Object" ;
  367.     case CL_INVALID_BUFFER_SIZE :
  368.       return ( char * ) "Invalid Buffer Size" ;
  369.     case CL_INVALID_MIP_LEVEL :
  370.       return ( char * ) "Invalid Mip Level" ;
  371.     case CL_INVALID_GLOBAL_WORK_SIZE :
  372.       return ( char * ) "Invalid Global Work Size" ;
  373.     default :
  374.       break ;
  375.     }
  376.   return "Unknown" ;
  377. }



#include <stdlib.h>
#include 
<stdio.h>
#include 
<string.h>
#include 
<CL/opencl.h>

cl_int cl_error; 
// OpenCL error code
cl_device_id device_id; // The chosen device
cl_program program; // OpenCL program


/**//** Formats the standard MACROS  __FILE__ and __LINE__ for message print.
 
*/

#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define AT __FILE__ ":" TOSTRING(__LINE__)


#define DEBUG_BUFFER_SIZE 4096
char * OpenCL_error_to_string(int error);

#define dump(msg,) \
    fprintf(stderr, AT msg,##__VA_ARGS__)


#define OpenCL_test_execution(msg,error)         \
  
do {                  \
    
if(CL_SUCCESS != error) {           \
      dump(
"The runtime error is %s\n",    \
        (
char *)OpenCL_error_to_string(error));      \
      exit(EXIT_FAILURE);            \
    }
                 \
  }
 while (0)




static double t_start, t_end; // Timing
double timer_get_time()
{
    
struct timeval t;
    
if (gettimeofday (&t, NULL) != 0{
      perror(
"Error gettimeofday !\n");
      exit(
1);
    }

    
return (t.tv_sec + t.tv_usec * 1.0e-6);
}


void timer_start() {
  t_start 
= timer_get_time();
}


void timer_stop_display( char *msg ) {
  t_end 
= timer_get_time();
  printf (
"%s : %0.1lf\n", msg, (t_end - t_start)*1000);
}




void openclSimpleCopy(cl_context context, cl_command_queue queue, cl_kernel kernel, size_t n) {
  
// Host data
  int a[n],b[n];
  
int _i;

  
// Init
  for(_i=0;_i<n;_i++{
    a[_i]
=n-_i;
    b[_i]
=0;
  }


  
// Buffers on the device
  cl_mem a_dev = clCreateBuffer(context,
                                CL_MEM_READ_WRITE,
                                n 
* sizeof(int),
                                NULL,
                                
&cl_error);
  OpenCL_test_execution(
"Create Buffer",cl_error);

  cl_mem b_dev 
= clCreateBuffer(context,
                                CL_MEM_READ_WRITE,
                                n 
* sizeof(int),
                                NULL,
                                
&cl_error);
  OpenCL_test_execution(
"Create Buffer",cl_error);


  
// 3 events is enough here
  cl_event event1;
  cl_event event2;
  cl_event event3;




  
// Initialize buffer on the device
  cl_error =  clEnqueueWriteBuffer(queue,
                       a_dev,
                       CL_TRUE,
                       
0,
                       n 
* sizeof(int),
                       a,
                       
0,
                       NULL,
                       
&event1);
  OpenCL_test_execution(
"Write to Buffer",cl_error);

  
// Shouldn't be useful, I used a blocking write !
  clFlush(queue);
  clWaitForEvents(
1,&event1);


  
// Arguments for the kernel
  cl_error = clSetKernelArg(kernel,0,sizeof(a_dev), &a_dev);
  OpenCL_test_execution(
"Set argument 0 ",cl_error);

  cl_error 
= clSetKernelArg(kernel,1,sizeof(b_dev), &b_dev);
  OpenCL_test_execution(
"Set argument 1",cl_error);

  timer_start();
  cl_error 
= clEnqueueNDRangeKernel(queue,
                                    kernel,
                                    
1,
                                    NULL,
                                    
&n,
                                    NULL,
                                    
1,
                                    
&event1,
                                    
&event2);

  timer_stop_display(
"Time for Enqueue");
  OpenCL_test_execution(
"Enqueue kernel",cl_error);

  OpenCL_test_execution(
"clWaitForEvents",clWaitForEvents(1,&event2));

  cl_error 
=  clEnqueueReadBuffer(queue,
                       b_dev,
                       CL_TRUE,
                       
0,
                       n 
* sizeof(int),
                       b,
                       
1,
                       
&event2,
                       
&event3);
  OpenCL_test_execution(
"Read from buffer",cl_error);

  OpenCL_test_execution(
"clWaitForEvents",clWaitForEvents(1,&event3));


  
// Check result
  for(_i=0;_i<n;_i++{
    
if(a[_i]!=b[_i]) {
      printf(
"Error %d : %d!=%d\n",_i,a[_i],b[_i]);
      exit(
-1);
    }

  }

  OpenCL_test_execution(
"Release mem object",clReleaseMemObject (a_dev));
  OpenCL_test_execution(
"Release mem object",clReleaseMemObject (b_dev));

}




int main(int argc, char **argv) {

  
int platform_num = 0// Platform number
  int device_num = 0// Device number
#define DEVICE_TYPE CL_DEVICE_TYPE_ALL

  cl_int cl_error; 
// OpenCL error code
  cl_kernel kernel = NULL;


  
// Chosing platform
  cl_uint num_platforms;
  clGetPlatformIDs(
0, NULL, &num_platforms);
  
if(num_platforms <= 0{
    dump(
"No OpenCL platforms found :-(\n");
    exit(
-1);
  }


  cl_platform_id platform_ids[num_platforms];
  clGetPlatformIDs(num_platforms, platform_ids, NULL);

  
if(platform_num < 0 || platform_num >= num_platforms) {
    dump(
"Invalid platform: %d\n", platform_num);
    exit(EXIT_FAILURE);
  }


  
// platform_id hold the chosen platform
  cl_platform_id platform_id = platform_ids[platform_num];

  
// Chosing the device

  cl_uint num_devices;
  OpenCL_test_execution(
"Get number of devices", clGetDeviceIDs(platform_id, DEVICE_TYPE, 0, NULL, &num_devices));

  
if(num_devices <= 0{
    dump(
"No devices found associated to this OpenCL platform :-(\n");
    exit(
-1);
  }


  
// Allocate spaces for devices
  cl_device_id devices[num_devices];

  
// Get devices list
  OpenCL_test_execution("Get devices list", clGetDeviceIDs(platform_id, DEVICE_TYPE, num_devices, devices, NULL));

  
/**//* Create a context for all devices */
  cl_context context 
= clCreateContext(0,
                                       num_devices,
                                       devices,
                                       NULL,
                                       
"from 'context'",
                                       
&cl_error);
  OpenCL_test_execution(
"Context creation",cl_error);

  
// Here is the device ID
  device_id = devices[device_num];

  
/**//* Create an in-order queue for this device */
  cl_command_queue queue 
= clCreateCommandQueue(context, device_id, 0&cl_error);
  OpenCL_test_execution(
"Create command queue",cl_error);

  
// END OF OPENCL INITIALIZATION
  const char *kernel_str = " __kernel void copy(__global int *a, __global int *b) {"
                           
" int i = get_global_id(0);"
                           
" b[i]=a[i];"
                           
"}";

  program 
= clCreateProgramWithSource(context,
                                      
1,
                                      
&kernel_str,
                                      NULL,
                                      
&cl_error);
  OpenCL_test_execution(
"Create program with source",cl_error);

  cl_error 
= clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  OpenCL_test_execution(
"Build Program",cl_error);

  kernel 
= clCreateKernel(program, "copy"&cl_error);
  OpenCL_test_execution(
"Create kernel",cl_error);




  
// Size of problem, 10^5 will give me 1.5ms for the enqueue, 10^6 up to 30ms!
  size_t n = 10000000;
  
int _i;
  
// Run the sequence many times
  printf("Run with n = %zu\n",n);
  
for(_i=0;_i<10;_i++{
    openclSimpleCopy(context, queue, kernel,n);
  }


  
// Run the same sequence with a smaller problem size
  n = n/100;
  printf(
"Run with n = %zu\n",n);
  
for(_i=0;_i<10;_i++{
    openclSimpleCopy(context, queue, kernel,n);
  }



}




char * OpenCL_error_to_string(int error) {
  
switch (error)
    
{
    
case CL_SUCCESS:
      
return (char *)"Success";
    
case CL_DEVICE_NOT_FOUND:
      
return (char *)"Device Not Found";
    
case CL_DEVICE_NOT_AVAILABLE:
      
return (char *)"Device Not Available";
    
case CL_COMPILER_NOT_AVAILABLE:
      
return (char *)"Compiler Not Available";
    
case CL_MEM_OBJECT_ALLOCATION_FAILURE:
      
return (char *)"Mem Object Allocation Failure";
    
case CL_OUT_OF_RESOURCES:
      
return (char *)"Out Of Ressources";
    
case CL_OUT_OF_HOST_MEMORY:
      
return (char *)"Out Of Host Memory";
    
case CL_PROFILING_INFO_NOT_AVAILABLE:
      
return (char *)"Profiling Info Not Available";
    
case CL_MEM_COPY_OVERLAP:
      
return (char *)"Mem Copy Overlap";
    
case CL_IMAGE_FORMAT_MISMATCH:
      
return (char *)"Image Format Mismatch";
    
case CL_IMAGE_FORMAT_NOT_SUPPORTED:
      
return (char *)"Image Format Not Supported";
    
case CL_BUILD_PROGRAM_FAILURE: {
  
#define CL_BUILD_PROGRAM_FAILURE_MSG "Build Program Failure : "
  
static char debug_buffer[DEBUG_BUFFER_SIZE]; // Static to be returned
  strncat(debug_buffer,CL_BUILD_PROGRAM_FAILURE_MSG,DEBUG_BUFFER_SIZE);
  clGetProgramBuildInfo(program,
          device_id,
          CL_PROGRAM_BUILD_LOG ,
          DEBUG_BUFFER_SIZE,
          debug_buffer
+strlen(CL_BUILD_PROGRAM_FAILURE_MSG),
          NULL);
      
return (char *)debug_buffer;
    }

    
case CL_MAP_FAILURE:
      
return (char *)"Map Failure";
    
case CL_INVALID_VALUE:
      
return (char *)"Invalid Value";
    
case CL_INVALID_DEVICE_TYPE:
      
return (char *)"Invalid Device Type";
    
case CL_INVALID_PLATFORM:
      
return (char *)"Invalid Platform";
    
case CL_INVALID_DEVICE:
      
return (char *)"Invalid Device";
    
case CL_INVALID_CONTEXT:
      
return (char *)"Invalid Context";
    
case CL_INVALID_QUEUE_PROPERTIES:
      
return (char *)"Invalid Queue Properties";
    
case CL_INVALID_COMMAND_QUEUE:
      
return (char *)"Invalid Command Queue";
    
case CL_INVALID_HOST_PTR:
      
return (char *)"Invalid Host Ptr";
    
case CL_INVALID_MEM_OBJECT:
      
return (char *)"Invalid Mem Object";
    
case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
      
return (char *)"Invalid Image Format Descriptor";
    
case CL_INVALID_IMAGE_SIZE:
      
return (char *)"Invalid Image Size";
    
case CL_INVALID_SAMPLER:
      
return (char *)"Invalid Sampler";
    
case CL_INVALID_BINARY:
      
return (char *)"Invalid Binary";
    
case CL_INVALID_BUILD_OPTIONS:
      
return (char *)"Invalid Build Options";
    
case CL_INVALID_PROGRAM:
      
return (char *)"Invalid Program";
    
case CL_INVALID_PROGRAM_EXECUTABLE:
      
return (char *)"Invalid Program Executable";
    
case CL_INVALID_KERNEL_NAME:
      
return (char *)"Invalid Kernel Name";
    
case CL_INVALID_KERNEL_DEFINITION:
      
return (char *)"Invalid Kernel Definition";
    
case CL_INVALID_KERNEL:
      
return (char *)"Invalid Kernel";
    
case CL_INVALID_ARG_INDEX:
      
return (char *)"Invalid Arg Index";
    
case CL_INVALID_ARG_VALUE:
      
return (char *)"Invalid Arg Value";
    
case CL_INVALID_ARG_SIZE:
      
return (char *)"Invalid Arg Size";
    
case CL_INVALID_KERNEL_ARGS:
      
return (char *)"Invalid Kernel Args";
    
case CL_INVALID_WORK_DIMENSION:
      
return (char *)"Invalid Work Dimension";
    
case CL_INVALID_WORK_GROUP_SIZE:
      
return (char *)"Invalid Work Group Size";
    
case CL_INVALID_WORK_ITEM_SIZE:
      
return (char *)"Invalid Work Item Size";
    
case CL_INVALID_GLOBAL_OFFSET:
      
return (char *)"Invalid Global Offset";
    
case CL_INVALID_EVENT_WAIT_LIST:
      
return (char *)"Invalid Event Wait List";
    
case CL_INVALID_EVENT:
      
return (char *)"Invalid Event";
    
case CL_INVALID_OPERATION:
      
return (char *)"Invalid Operation";
    
case CL_INVALID_GL_OBJECT:
      
return (char *)"Invalid GL Object";
    
case CL_INVALID_BUFFER_SIZE:
      
return (char *)"Invalid Buffer Size";
    
case CL_INVALID_MIP_LEVEL:
      
return (char *)"Invalid Mip Level";
    
case CL_INVALID_GLOBAL_WORK_SIZE:
      
return (char *)"Invalid Global Work Size";
    
default:
      
break;
    }

  
return "Unknown";
}

你可能感兴趣的:(Test latency for clEnqueueNDRangeKernel)