cuda cuPrintf用法示例

需要下载两个文件:

 

cuPrintf.cu【求猛击】

cuPrintf.cuh【同求】


然后将这她们置于你的project下,和你的xxoo.cu共处一室即可。

给出一个简单示例,下面附带一些官方解释,不准偷走!

 

#include <stdio.h> #include <stdlib.h> #include <time.h> #include <cuda.h> #include "cuPrintf.cu" bool InitGPUSet() { char GPU[100] = "GPU: "; cudaDeviceProp tCard; int num = 0; if(cudaSuccess == cudaGetDeviceCount(&num)) { for(int i = 0; i < num; ++ i) { cudaSetDevice(i); cudaGetDeviceProperties(&tCard, i); puts(strcat(GPU , tCard.name));//返回的就是链接后的结果,也为其的嵌套使用提供了条件 } } else return false; return true; } bool cuPrintInit() { cudaError_t err = cudaPrintfInit(); if(0 != strcmp("no error", cudaGetErrorString(err))) return false; return true; } __global__ void displayGPU_demo() { int bsize = blockDim.x; int bid = blockIdx.x; int tid = bid * bsize + threadIdx.x; cuPrintf("当前执行kernel的 block 编号:/t%d/n", bid); cuPrintf("当前执行kernel的 thread 在当前块中编号:/t%d/n", threadIdx.x); cuPrintf("当前执行kernel的 thread 全局编号:/t%d/n", tid); cuPrintf("thread over/n/n"); } int main(void) { if(!InitGPUSet()) puts("device is not ready!"); else if(!cuPrintInit()) puts("device is not ready!"); else { displayGPU_demo<<<2, 3>>>(); cudaPrintfDisplay(stdout, true);//true输出是哪一个block的第几个thread在执行本条输出语句,形如:[blockID, threadID];false不输出 cudaPrintfEnd(); } return 0; } /** cudaPrintfInit and cudaPrintfEnd only need be called once throughout your entire project. Output is not automatically displayed on the screen, but stored in a buffer which is cleared and displayed when cudaPrintfDisplay is called. The size of the buffer can be specified with the optional argument cudaPrintfInit(size_t bufferLen). cudaPrintfEnd simply frees the memory allocated by cudaPrintfInit. When cudaPrintfDisplay is called, output stored in the buffer is displayed to the console. The second argument in this call either displays the current thread (true) or doesn’t (false). The first arguemnt, specified by stdout in this example, simply defines the descriptor where the cuPrintf log is sent. On another note, I’ve found that using cuPrintf impacts on the performance of my kernels, presumably due to the data transfer performed every time cuPrintfDisplay() is called. */ //官方说明,from "cuPrintf.cuh" // DEVICE SIDE // External function definitions for device-side code // // cuPrintfRestrict // // Called to restrict output to a given thread/block. Pass // the constant CUPRINTF_UNRESTRICTED to unrestrict output // for thread/block IDs. Note you can therefore allow // "all printfs from block 3" or "printfs from thread 2 // on all blocks", or "printfs only from block 1, thread 5". // // Arguments: // threadid - Thread ID to allow printfs from // blockid - Block ID to allow printfs from // // NOTE: Restrictions last between invocations of // kernels unless cudaPrintfInit() is called again. // /////////////////#define CUPRINTF_UNRESTRICTED -1 ////////////////__device__ void cuPrintfRestrict(int threadid, int blockid); // HOST SIDE // External function definitions for host-side code // cudaPrintfInit // // Call this once to initialise the printf system. If the output // file or buffer size needs to be changed, call cudaPrintfEnd() // before re-calling cudaPrintfInit(). // // The default size for the buffer is 1 megabyte. For CUDA // architecture 1.1 and above, the buffer is filled linearly and // is completely used; however for architecture 1.0, the buffer // is divided into as many segments are there are threads, even // if some threads do not call cuPrintf(). // // Arguments: // bufferLen - Length, in bytes, of total space to reserve // (in device global memory) for output. // // Returns: // cudaSuccess if all is well. // /////////////////extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=8*1048576); // 1-meg - that's enough for 4096 printfs by all threads put together // // cudaPrintfEnd // // Cleans up all memories allocated by cudaPrintfInit(). // Call this at exit, or before calling cudaPrintfInit() again. // /////////////////////extern "C" void cudaPrintfEnd(); // // cudaPrintfDisplay // // Dumps the contents of the output buffer to the specified // file pointer. If the output pointer is not specified, // the default "stdout" is used. // // Arguments: // outputFP - A file pointer to an output stream. // showThreadID - If "true", output strings are prefixed // by "[blockid, threadid] " at output. // // Returns: // cudaSuccess if all is well. // //////////////extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false); //////////////#endif // CUPRINTF_H 

你可能感兴趣的:(thread,CUDA,buffer,performance,Descriptor,output)