CUDA与Direct3D 一致性

2023-10-24 10:42:35

CUDA与Direct3D 一致性

Direct3D 9Ex、Direct3D 10 和 Direct3D 11 支持 Direct3D 互操作性。

CUDA 上下文只能与满足以下条件的 Direct3D 设备互操作： Direct3D 9Ex 设备必须使用设置为 D3DDEVTYPE_HAL 的 DeviceType 和使用 D3DCREATE_HARDWARE_VERTEXPROCESSING 标志的 BehaviorFlags 创建； Direct3D 10 和 Direct3D 11 设备必须在 DriverType 设置为 D3D_DRIVER_TYPE_HARDWARE 的情况下创建。

可以映射到 CUDA 地址空间的 Direct3D 资源是 Direct3D 缓冲区、纹理和表面。这些资源使用 cudaGraphicsD3D9RegisterResource()、cudaGraphicsD3D10RegisterResource() 和 cudaGraphicsD3D11RegisterResource() 注册。

以下代码示例使用内核动态修改存储在顶点缓冲区对象中的 2D width x height网格。

Direct3D 9 Version:

IDirect3D9* D3D;
IDirect3DDevice9* device;
struct CUSTOMVERTEX {FLOAT x, y, z;DWORD color;
};
IDirect3DVertexBuffer9* positionsVB;
struct cudaGraphicsResource* positionsVB_CUDA;int main()
{int dev;// Initialize Direct3DD3D = Direct3DCreate9Ex(D3D_SDK_VERSION);// Get a CUDA-enabled adapterunsigned int adapter = 0;for (; adapter < g_pD3D->GetAdapterCount(); adapter++) {D3DADAPTER_IDENTIFIER9 adapterId;g_pD3D->GetAdapterIdentifier(adapter, 0, &adapterId);if (cudaD3D9GetDevice(&dev, adapterId.DeviceName)== cudaSuccess)break;}// Create device...D3D->CreateDeviceEx(adapter, D3DDEVTYPE_HAL, hWnd,D3DCREATE_HARDWARE_VERTEXPROCESSING,¶ms, NULL, &device);// Use the same devicecudaSetDevice(dev);// Create vertex buffer and register it with CUDAunsigned int size = width * height * sizeof(CUSTOMVERTEX);device->CreateVertexBuffer(size, 0, D3DFVF_CUSTOMVERTEX,D3DPOOL_DEFAULT, &positionsVB, 0);cudaGraphicsD3D9RegisterResource(&positionsVB_CUDA,positionsVB,cudaGraphicsRegisterFlagsNone);cudaGraphicsResourceSetMapFlags(positionsVB_CUDA,cudaGraphicsMapFlagsWriteDiscard);// Launch rendering loopwhile (...) {...Render();...}...
}
void Render()
{// Map vertex buffer for writing from CUDAfloat4* positions;cudaGraphicsMapResources(1, &positionsVB_CUDA, 0);size_t num_bytes; cudaGraphicsResourceGetMappedPointer((void**)&positions,&num_bytes,  positionsVB_CUDA));// Execute kerneldim3 dimBlock(16, 16, 1);dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);createVertices<<>>(positions, time,width, height);// Unmap vertex buffercudaGraphicsUnmapResources(1, &positionsVB_CUDA, 0);// Draw and present...
}void releaseVB()
{cudaGraphicsUnregisterResource(positionsVB_CUDA);positionsVB->Release();
}__global__ void createVertices(float4* positions, float time,unsigned int width, unsigned int height)
{unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;// Calculate uv coordinatesfloat u = x / (float)width;float v = y / (float)height;u = u * 2.0f - 1.0f;v = v * 2.0f - 1.0f;// Calculate simple sine wave patternfloat freq = 4.0f;float w = sinf(u * freq + time)* cosf(v * freq + time) * 0.5f;// Write positionspositions[y * width + x] =make_float4(u, w, v, __int_as_float(0xff00ff00));
}

Direct3D 10 Version

ID3D10Device* device;
struct CUSTOMVERTEX {FLOAT x, y, z;DWORD color;
};
ID3D10Buffer* positionsVB;
struct cudaGraphicsResource* positionsVB_CUDA;int main()
{int dev;// Get a CUDA-enabled adapterIDXGIFactory* factory;CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory);IDXGIAdapter* adapter = 0;for (unsigned int i = 0; !adapter; ++i) {if (FAILED(factory->EnumAdapters(i, &adapter))break;if (cudaD3D10GetDevice(&dev, adapter) == cudaSuccess)break;adapter->Release();}factory->Release();// Create swap chain and device...D3D10CreateDeviceAndSwapChain(adapter, D3D10_DRIVER_TYPE_HARDWARE, 0, D3D10_CREATE_DEVICE_DEBUG,D3D10_SDK_VERSION, &swapChainDesc, &swapChain,&device);adapter->Release();// Use the same devicecudaSetDevice(dev);// Create vertex buffer and register it with CUDAunsigned int size = width * height * sizeof(CUSTOMVERTEX);D3D10_BUFFER_DESC bufferDesc;bufferDesc.Usage          = D3D10_USAGE_DEFAULT;bufferDesc.ByteWidth      = size;bufferDesc.BindFlags      = D3D10_BIND_VERTEX_BUFFER;bufferDesc.CPUAccessFlags = 0;bufferDesc.MiscFlags      = 0;device->CreateBuffer(&bufferDesc, 0, &positionsVB);cudaGraphicsD3D10RegisterResource(&positionsVB_CUDA,positionsVB,cudaGraphicsRegisterFlagsNone);cudaGraphicsResourceSetMapFlags(positionsVB_CUDA,cudaGraphicsMapFlagsWriteDiscard);// Launch rendering loopwhile (...) {...Render();...}...
}
void Render()
{// Map vertex buffer for writing from CUDAfloat4* positions;cudaGraphicsMapResources(1, &positionsVB_CUDA, 0);size_t num_bytes; cudaGraphicsResourceGetMappedPointer((void**)&positions,&num_bytes,  positionsVB_CUDA));// Execute kerneldim3 dimBlock(16, 16, 1);dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);createVertices<<>>(positions, time,width, height);// Unmap vertex buffercudaGraphicsUnmapResources(1, &positionsVB_CUDA, 0);// Draw and present...
}void releaseVB()
{cudaGraphicsUnregisterResource(positionsVB_CUDA);positionsVB->Release();
}__global__ void createVertices(float4* positions, float time,unsigned int width, unsigned int height)
{unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;// Calculate uv coordinatesfloat u = x / (float)width;float v = y / (float)height;u = u * 2.0f - 1.0f;v = v * 2.0f - 1.0f;// Calculate simple sine wave patternfloat freq = 4.0f;float w = sinf(u * freq + time)* cosf(v * freq + time) * 0.5f;// Write positionspositions[y * width + x] =make_float4(u, w, v, __int_as_float(0xff00ff00));
}

Direct3D 11 Version

ID3D11Device* device;
struct CUSTOMVERTEX {FLOAT x, y, z;DWORD color;
};
ID3D11Buffer* positionsVB;
struct cudaGraphicsResource* positionsVB_CUDA;int main()
{int dev;// Get a CUDA-enabled adapterIDXGIFactory* factory;CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)&factory);IDXGIAdapter* adapter = 0;for (unsigned int i = 0; !adapter; ++i) {if (FAILED(factory->EnumAdapters(i, &adapter))break;if (cudaD3D11GetDevice(&dev, adapter) == cudaSuccess)break;adapter->Release();}factory->Release();// Create swap chain and device...sFnPtr_D3D11CreateDeviceAndSwapChain(adapter, D3D11_DRIVER_TYPE_HARDWARE,0, D3D11_CREATE_DEVICE_DEBUG,featureLevels, 3,D3D11_SDK_VERSION, &swapChainDesc, &swapChain,&device,&featureLevel,&deviceContext);adapter->Release();// Use the same devicecudaSetDevice(dev);// Create vertex buffer and register it with CUDAunsigned int size = width * height * sizeof(CUSTOMVERTEX);D3D11_BUFFER_DESC bufferDesc;bufferDesc.Usage          = D3D11_USAGE_DEFAULT;bufferDesc.ByteWidth      = size;bufferDesc.BindFlags      = D3D11_BIND_VERTEX_BUFFER;bufferDesc.CPUAccessFlags = 0;bufferDesc.MiscFlags      = 0;device->CreateBuffer(&bufferDesc, 0, &positionsVB);cudaGraphicsD3D11RegisterResource(&positionsVB_CUDA,positionsVB,cudaGraphicsRegisterFlagsNone);cudaGraphicsResourceSetMapFlags(positionsVB_CUDA,cudaGraphicsMapFlagsWriteDiscard);// Launch rendering loopwhile (...) {...Render();...}...
}
void Render()
{// Map vertex buffer for writing from CUDAfloat4* positions;cudaGraphicsMapResources(1, &positionsVB_CUDA, 0);size_t num_bytes; cudaGraphicsResourceGetMappedPointer((void**)&positions,&num_bytes,  positionsVB_CUDA));// Execute kerneldim3 dimBlock(16, 16, 1);dim3 dimGrid(width / dimBlock.x, height / dimBlock.y, 1);createVertices<<>>(positions, time,width, height);// Unmap vertex buffercudaGraphicsUnmapResources(1, &positionsVB_CUDA, 0);// Draw and present...
}void releaseVB()
{cudaGraphicsUnregisterResource(positionsVB_CUDA);positionsVB->Release();
}__global__ void createVertices(float4* positions, float time,unsigned int width, unsigned int height)
{unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;// Calculate uv coordinatesfloat u = x / (float)width;float v = y / (float)height;u = u * 2.0f - 1.0f;v = v * 2.0f - 1.0f;// Calculate simple sine wave patternfloat freq = 4.0f;float w = sinf(u * freq + time)* cosf(v * freq + time) * 0.5f;// Write positionspositions[y * width + x] =make_float4(u, w, v, __int_as_float(0xff00ff00));
}

SLI一致性

在具有多个 GPU 的系统中，所有支持 CUDA 的 GPU 都可以通过 CUDA 驱动程序和运行时作为单独的设备进行访问。然而，当系统处于 SLI 模式时，有如下所述的特殊注意事项。

首先，在一个 GPU 上的一个 CUDA 设备中的分配将消耗其他 GPU 上的内存，这些 GPU 是 Direct3D 或 OpenGL 设备的 SLI 配置的一部分。因此，分配可能会比预期的更早失败。

其次，应用程序应该创建多个 CUDA 上下文，一个用于 SLI 配置中的每个 GPU。虽然这不是严格要求，但它避免了设备之间不必要的数据传输。应用程序可以将 cudaD3D[9|10|11]GetDevices() 用于 Direct3D 和 cudaGLGetDevices() 用于 OpenGL 调用，以识别当前执行渲染的设备的 CUDA 设备句柄和下一帧。鉴于此信息，应用程序通常会选择适当的设备并将 Direct3D 或 OpenGL 资源映射到由 cudaD3D[9|10|11]GetDevices() 或当 deviceList 参数设置为 cudaD3D[9|10 |11]DeviceListCurrentFrame 或 cudaGLDeviceListCurrentFrame。

请注意，从 cudaGraphicsD9D[9|10|11]RegisterResource 和 cudaGraphicsGLRegister[Buffer|Image] 返回的资源只能在发生注册的设备上使用。因此，在 SLI 配置中，当在不同的 CUDA 设备上计算不同帧的数据时，有必要分别为每个设备注册资源。

有关 CUDA 运行时如何分别与 Direct3D 和 OpenGL 互操作的详细信息，请参阅 Direct3D 互操作性和 OpenGL 互操作性。

本文来自互联网用户投稿，文章观点仅代表作者本人，不代表本站立场，不承担相关法律责任。如若转载，请注明出处。 如若内容造成侵权/违法违规/事实不符，请点击【内容举报】进行投诉反馈！

标签：技术

上一篇 > Direct3D11入门-环境配置
下一篇 > 在Vulkan、Metal和Direct3D上运行Qt Quick - 第2部分

Duilib中list控件支持ctrl和shif多行选中的实现

[ICML2015]Batch Normalization:Accelerating Deep Network Training by Reducing Internal Covariate Shif

win10系统微软输入法于eclipse ctrl+shif+f冲突间接处理办法

Codeforces Round #259 (Div. 2) B. Little Pony and Sort by Shif

读LDD3，内存映射与DMA--PAGE_SHIF…

VMware虚拟机安装XP【要先分区，再设置BOOT 启动CD，shif+上移】

更换iBus五笔的左与右Shif

sublime ctrl+shif+f 没用解决办法

idea 对 ctrl + z 的撤销是 ctrl + shif + z

计算机最早的设计师应用于,计算机应用基础选择题doc.doc

win10自带截图神器：Win+Shift+S

Python基础之文件目录操作

python简述目录_Python基础之文件目录操作(示例代码)

tp5 如何做数据采集

任务2-7(服务器字体+阿里巴巴矢量库)

html标签（1)：h1~h6,p,br,pre,hr

TI 电量计介绍与芯片选型指南

几款TI电源芯片简介

TI DSP芯片C2000系列读取FLASH数据

德州仪器(Ti)平台嵌入式开发基础

TI三相电机智能栅极驱动芯片特点分类

省选模拟（12.08） T3 圈圈圈圈圈圈圈圈

Hadoop生态圈技术栈（上）

大数据开发基础入门与项目实战（三）Hadoop核心及生态圈技术栈之6.Impala交互式查询

小猿圈之Linux下Mysql 操作命令

大数据Hadoop生态圈常用面试题

大数据开发基础入门与项目实战（三）Hadoop核心及生态圈技术栈之4.Hive DDL、DQL和数据操作

备战Noip2018模拟赛11（B组）T3 Monogatari 物语

【智能优化算法-圆圈搜索算法】基于圆圈搜索算法Circle Search Algorithm求解单目标优化问题附matlab代码

NYOJ 78 圈水池

递归问题跑道汽车绕圈问题 Python实现

Hadoop生态圈（三）：MapReduce

CUDA与Direct3D 一致性

CUDA与Direct3D 一致性

SLI一致性

相关文章