六、D3D与CUDA的互操作
CUDA与D3D可以进行互操作,也就是说可以将D3D的资源映射到CUDA地址空间,使在CUDA中可以对D3D读取和写入数据。CUDA实现这个功能的API有两组,这里以运行时API为例。
实现D3D与CUDA的互操作,从总体上看,主要多了D3D设备指定和资源注册及相应释放操作,以及在创建D3D设备时使用D3DCREATE_HARDWARE_VERTEXPROCESSING标识,见下例。
1、 初始化D3D设备
这个过程与一般的D3D设备创建过程并没有太多变化,注意用颜色表示的部分。另外,用于指定D3D设备的函数cudaD3D9SetDirect3DDevice必须在调用其它CUDA运行时API之前调用。
HRESULT Cdig_D3DDlg::InitD3D9( UINT hWnd )
{
m_hwndRender =GetDlgItem(hWnd)->GetSafeHwnd();//当前显示窗口
LPDIRECT3D9 g_pD3D = NULL; //Direct3D对象指针(不是设备指针)LPDIRECT3D9 g_pD3D = NULL; //Direct3D对象指针(不是设备指针)
// Create the D3D object.
if( NULL == ( g_pD3D = Direct3DCreate9( D3D_SDK_VERSION ) ) )
return E_FAIL;
// Find the first CUDA capable device
for(g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++)
{
D3DCAPS9 caps;
if (FAILED(g_pD3D->GetDeviceCaps(g_iAdapter, D3DDEVTYPE_HAL, &caps)))
// Adapter doesn't support Direct3D
continue;
D3DADAPTER_IDENTIFIER9 ident;
int device;
g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &ident);
cudaD3D9GetDevice(&device, ident.DeviceName);
if (cudaSuccess == cudaGetLastError() )
break;
}
// we check to make sure we have found a cuda-compatible D3D device to work on
if(g_iAdapter == g_pD3D->GetAdapterCount() ) {
printf("No CUDA-compatible Direct3D9 device available/n");
printf("Test PASSED/n");
// destroy the D3D device
g_pD3D->Release();
exit(0);
return E_FAIL;
}
D3DPRESENT_PARAMETERS g_d3dpp;
RECT rc;
GetDlgItem(hWnd)->GetClientRect(&rc);
g_pD3D->GetAdapterDisplayMode(g_iAdapter, &g_d3ddm);
// Set up the structure used to create the D3DDevice
// D3DPRESENT_PARAMETERS d3dpp;
ZeroMemory( &g_d3dpp, sizeof(g_d3dpp) );
g_d3dpp.Windowed = true;
g_d3dpp.BackBufferCount = 1;
g_d3dpp.hDeviceWindow = m_hwndRender;
g_d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
g_d3dpp.BackBufferFormat = g_d3ddm.Format;
g_d3dpp.FullScreen_RefreshRateInHz = 0; // set to 60 for fullscreen, and also don't forg_et to set Windowed to FALSE
g_d3dpp.PresentationInterval = D3DPRESENT_INTERVAL_ONE; // D3DPRESENT_DONOTWAIT;
//g_d3dpp.BackBufferWidth = g_bQAReadback?g_WindowWidth:(rc.right - rc.left);
//g_d3dpp.BackBufferHeight = g_bQAReadback?g_WindowHeight:(rc.bottom - rc.top);
g_d3dpp.BackBufferWidth = 512;
g_d3dpp.BackBufferHeight = 512;
// Create the D3DDevice
if ( FAILED( g_pD3D->CreateDevice( D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, m_hwndRender,D3DCREATE_HARDWARE_VERTEXPROCESSING,&g_d3dpp, &g_pd3dDevice ) ) )
return E_FAIL;
// Turn off culling, so we see the front and back of the triangle
g_pd3dDevice->SetRenderState(D3DRS_CULLMODE, D3DCULL_NONE);
// Turn off lighting, since we are providing our own vertex colors
g_pd3dDevice->SetRenderState(D3DRS_LIGHTING, FALSE);
// Now we need to bind a CUDA context to the DX9 device
// This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
cudaD3D9SetDirect3DDevice(g_pd3dDevice);
return S_OK;
}
2、 顶点创建与资源注册
这里首先创建一个顶点buffer,然后注册的CUDA上,其中可以通过设定函数cudaD3D9ResourceSetMapFlags的标识符来决定资源的读写属性。
HRESULT Cdig_D3DDlg::InitVB()//D3DXCreateTextureFromFile
{
if (FAILED(g_pd3dDevice->CreateVertexBuffer(65536*sizeof(CUSTOMVERTEX), 0, D3DFVF_CUSTOMVERTEX, D3DPOOL_DEFAULT,/
&g_pVB, NULL)))
{
return E_FAIL;
}
// Initialize interoperability between CUDA and Direct3D9
// Register vertex buffer with CUDA
cudaD3D9RegisterResource(g_pVB, cudaD3D9RegisterFlagsNone);
// cudaD3D9MapFlagsWriteDiscard: Specifies that CUDA kernels which access this resource will not read from this resource
//and will write over the entire contents of the resource,
//so none of the data previously stored in the resource will be preserved.
cudaD3D9ResourceSetMapFlags(g_pVB, cudaD3D9MapFlagsWriteDiscard);
return S_OK;
};
3、CUDA写入顶点数据
首先调用cudaD3D9MapResources函数进行资源映射(第一个参数指定D3D资源数目),再由函数cudaD3D9ResourceGetMappedPointer得到显存地址,这里也可以使用CUDA Array和2D pitch空间,即cudaD3D9ResourceGetMappedArray和cudaD3D9ResourceGetMappedPitch.
函数cudaD3D9UnmapResources用于解除资源映射。
void Cdig_D3DDlg::runCuda()
{
HRESULT hr = S_OK;
// Map vertex buffer to Cuda
float4* dptr;
// CUDA Map call to the Vertex Buffer and return a pointer
cudaD3D9MapResources(1, (IDirect3DResource9 **)&g_pVB);
cudaD3D9ResourceGetMappedPointer( (void **)&dptr, g_pVB, 0, 0);
const unsigned int g_MeshWidth = 256;
const unsigned int g_MeshHeight = 256;
float anim=0.1f;
// Execute kernel
simpleD3DKernel(dptr, g_MeshWidth, g_MeshHeight, anim);
// CUDA Map Unmap vertex buffer
cudaD3D9UnmapResources(1, (IDirect3DResource9 **)&g_pVB);
}
4、渲染
渲染的方式与一般的D3D渲染完全相同,这里给出一个参考示例。
HRESULT Cdig_D3DDlg::render()
{
if( g_pd3dDevice )
{
//
// Draw the scene.
//
g_pd3dDevice->Clear(0, 0,
D3DCLEAR_TARGET ,
D3DCOLOR_XRGB(0,0,0), 1.0f,0);
// Run CUDA to update vertex positions
runCuda();
g_pd3dDevice->BeginScene();
// Setup the world, view, and projection matrices
SetupMatrices(IDC_RENDERVIEW);
SetupLights();
// 渲染顶点缓冲区内容
/*g_pd3dDevice->SetStreamSource(0, g_pVB, 0, sizeof(CUSTOMVERTEX));
g_pd3dDevice->SetFVF(D3DFVF_CUSTOMVERTEX);
g_pd3dDevice->DrawPrimitive(D3DPT_TRIANGLESTRIP, 0, 2*50-2);*/
// Render the vertex buffer contents
g_pd3dDevice->SetStreamSource( 0, g_pVB, 0, sizeof(CUSTOMVERTEX) );
g_pd3dDevice->SetFVF( D3DFVF_CUSTOMVERTEX );
g_pd3dDevice->DrawPrimitive( D3DPT_POINTLIST, 0, 65536 );
g_pd3dDevice->EndScene();
//将渲染的结果存储到后台缓冲区
//0表示更新所有的缓冲区
g_pd3dDevice->Present(0, 0, 0, 0);
}
return S_OK;
}
5、资源释放
Never forget to release these resources after you don’t use them again.
HRESULT Cdig_D3DDlg::cleanup()
{
if( g_pVB != NULL ) {
// Unregister vertex buffer
cudaD3D9UnregisterResource(g_pVB);
g_pVB->Release();
}
// Uninitialize CUDA data and context
cudaThreadExit();
// Nothing to Destroy.
if( g_pd3dDevice )
g_pd3dDevice->Release();
return S_OK;
}
到此为止,D3D的入门学习告一段落了,更高级的三维渲染技术需要在工程实践中得到锻炼和提升,路漫漫其修远兮,唯有上下求索耳!