玩转gpgpu-sim 04记—— __cudaRegisterBinary() of gpgpu-sim 到底做了什么

官方文档:

GPGPU-Sim 3.x Manual

__cudaRegisterBinary(void*) 被执行到的代码逻辑如下:

void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
{
#if (CUDART_VERSION < 2010)
	printf("GPGPU-Sim PTX: ERROR ** this version of GPGPU-Sim requires CUDA 2.1 or higher\n");
	exit(1);
#endif
	CUctx_st *context = GPGPUSim_Context();
	static unsigned next_fat_bin_handle = 1;
	if(context->get_device()->get_gpgpu()->get_config().use_cuobjdump()) {
		// The following workaround has only been verified on 64-bit systems. 
		if (sizeof(void*) == 4) 
			printf("GPGPU-Sim PTX: FatBin file name extraction has not been tested on 32-bit system.\n"); 

		// FatBin handle from the .fatbin.c file (one of the intermediate files generated by NVCC)
		typedef struct {int m; int v; const unsigned long long* d; char* f;} __fatDeviceText __attribute__ ((aligned (8))); 
		__fatDeviceText * fatDeviceText = (__fatDeviceText *) fatCubin;

		// Extract the source code file name that generate the given FatBin. 
		// - Obtains the pointer to the actual fatbin structure from the FatBin handle (fatCubin).
		// - An integer inside the fatbin structure contains the relative offset to the source code file name.
		// - This offset differs among different CUDA and GCC versions. 
		char * pfatbin = (char*) fatDeviceText->d; 
		int offset = *((int*)(pfatbin+48)); 
		char * filename = (pfatbin+16+offset); 

		// The extracted file name is associated with a fat_cubin_handle passed
		// into cudaLaunch().  Inside cudaLaunch(), the associated file name is
		// used to find the PTX/SASS section from cuobjdump, which contains the
		// PTX/SASS code for the launched kernel function.  
		// This allows us to work around the fact that cuobjdump only outputs the
		// file name associated with each section. 
		unsigned long long fat_cubin_handle = next_fat_bin_handle;
		next_fat_bin_handle++;
		printf("GPGPU-Sim PTX: __cudaRegisterFatBinary, fat_cubin_handle = %llu, filename=%s\n", fat_cubin_handle, filename);
		/*!
		 * This function extracts all data from all files in first call
		 * then for next calls, only returns the appropriate number
		 */
		assert(fat_cubin_handle >= 1);
		if (fat_cubin_handle==1) cuobjdumpInit();
		cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);

		return (void**)fat_cubin_handle;
	}else{ ... }

}

1. 调用关系

刚开始一波的调用关系如下:

玩转gpgpu-sim 04记—— __cudaRegisterBinary() of gpgpu-sim 到底做了什么_第1张图片

代码方便索引,此处整理的整体关系为下面的函数调用上面首先定义的函数:

class gpgpu_functional_sim_config 
{	...
	int m_ptx_use_cuobjdump;
	...
}

void gpgpu_functional_sim_config::reg_options(class OptionParser * opp)
{	...
	option_parser_register(opp, 
						   "-gpgpu_ptx_use_cuobjdump", OPT_BOOL,
						   &m_ptx_use_cuobjdump,
						   "Use cuobjdump to extract ptx and sass from binaries",
						   "1");//CUDART_VERSION >= 4000
	...
}

gpgpu_sim *gpgpu_ptx_sim_init_perf()
{	...
	g_the_gpu_config.reg_options(opp);
	...
}

class _cuda_device_id *GPGPUSim_Init()
{	...
	gpgpu_sim *the_gpu = gpgpu_ptx_sim_init_perf();
	the_gpu->set_prop(prop);
	the_device = new _cuda_device_id(the_gpu);
	start_sim_thread(1);
	...
}

void** CUDARTAPI __cudaRegisterFatBinary( void *fatCubin )
{	...
	static CUctx_st* GPGPUSim_Context()
		class _cuda_device_id *GPGPUSim_Init()
		CUctx_st( _cuda_device_id *gpu ) { m_gpu = gpu; }//the_context = new CUctx_st(the_gpu);
	cuobjdumpInit();
	cuobjdumpRegisterFatBinary(fat_cubin_handle, filename);
	...
}

2. GPGPUSim_Context() 做了什么

3. 表示什么含义

GPGPUSim_Context()->get_device()->get_gpgpu()->get_config().use_cuobjdump() 表示什么含义

4. cuobjdumpInit() 做了什么

5. cuobjdumpRegisterFatBinary() 做了什么

你可能感兴趣的:(前端)