From c1c806490ff51b1e760eb38ce153936283377d04 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 10 Aug 2025 16:05:18 +0200 Subject: [PATCH] Add `nvapi` lib to D3D11 for efficient UAV writes overlaps on NVIDIA GPUs --- Source/Engine/Graphics/GPUPass.h | 6 ++++ .../DirectX/DX11/GPUContextDX11.cpp | 21 +++++++++++++ .../DirectX/DX11/GPUContextDX11.h | 1 + .../DirectX/DX11/GPUDeviceDX11.cpp | 30 ++++++++++++++++++- .../DirectX/DX11/GraphicsDeviceDX11.Build.cs | 2 ++ 5 files changed, 59 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/GPUPass.h b/Source/Engine/Graphics/GPUPass.h index 59f8608e2..5a0520ec0 100644 --- a/Source/Engine/Graphics/GPUPass.h +++ b/Source/Engine/Graphics/GPUPass.h @@ -46,6 +46,12 @@ struct FLAXENGINE_API GPUMemoryPass : GPUPass { Context->MemoryBarrier(); } + + // Inserts a global memory barrier on data copies between resources. Use to ensure all writes and before submitting another commands. + void MemoryBarrier() + { + Context->MemoryBarrier(); + } }; /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp index 3d94cdd96..6d0957014 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp @@ -14,6 +14,10 @@ #include "Engine/Core/Math/Viewport.h" #include "Engine/Core/Math/Rectangle.h" #include "Engine/Profiler/RenderStats.h" +#if COMPILE_WITH_NVAPI +#include +extern bool EnableNvapi; +#endif #define DX11_CLEAR_SR_ON_STAGE_DISABLE 0 @@ -903,6 +907,23 @@ void GPUContextDX11::CopySubresource(GPUResource* dstResource, uint32 dstSubreso _context->CopySubresourceRegion(dstResourceDX11->GetResource(), dstSubresource, 0, 0, 0, srcResourceDX11->GetResource(), srcSubresource, nullptr); } +void GPUContextDX11::OverlapUA(bool end) +{ + // DirectX 11 doesn't support UAV barriers control but custom GPU driver extensions allow to manually specify overlap sections. +#if COMPILE_WITH_NVAPI + if (EnableNvapi) + { + if (end) + NvAPI_D3D11_EndUAVOverlap(_context); + else + NvAPI_D3D11_BeginUAVOverlap(_context); + return; + } +#endif + // TODO: add support for AMD extensions to overlap UAV writes (agsDriverExtensionsDX11_BeginUAVOverlap/agsDriverExtensionsDX11_EndUAVOverlap) + // TODO: add support for Intel extensions to overlap UAV writes (INTC_D3D11_BeginUAVOverlap/INTC_D3D11_EndUAVOverlap) +} + void GPUContextDX11::flushSRVs() { #define FLUSH_STAGE(STAGE) if (Current##STAGE) _context->STAGE##SetShaderResources(0, ARRAY_COUNT(_srHandles), _srHandles) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h index 6d1877534..ccdac0d70 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h @@ -167,6 +167,7 @@ public: void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override; void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override; void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override; + void OverlapUA(bool end) override; }; #endif diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp index 411d9dd92..ca479a7c0 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUDeviceDX11.cpp @@ -20,7 +20,10 @@ #include "Engine/Graphics/PixelFormatExtensions.h" #include "Engine/Engine/CommandLine.h" #include "Engine/Profiler/ProfilerMemory.h" - +#if COMPILE_WITH_NVAPI +#include +bool EnableNvapi = false; +#endif #if !USE_EDITOR && PLATFORM_WINDOWS #include "Engine/Core/Config/PlatformSettings.h" #endif @@ -420,6 +423,31 @@ bool GPUDeviceDX11::Init() { HRESULT result; + // Driver extensions +#if COMPILE_WITH_NVAPI + if (_adapter->IsNVIDIA()) + { + NvAPI_Status status = NvAPI_Initialize(); + if (status == NVAPI_OK) + { + EnableNvapi = true; + + NvU32 driverVersion; + NvAPI_ShortString buildBranch(""); + if (NvAPI_SYS_GetDriverAndBranchVersion(&driverVersion, buildBranch) == NVAPI_OK) + { + LOG(Info, "NvApi driver version: {}, {}", driverVersion, StringAsUTF16<>(buildBranch).Get()); + } + } + else + { + NvAPI_ShortString desc; + NvAPI_GetErrorMessage(status, desc); + LOG(Warning, "NvAPI_Initialize failed with result {} ({})", (int32)status, String(desc)); + } + } +#endif + // Get DXGI adapter ComPtr adapter; if (_factoryDXGI->EnumAdapters(_adapter->Index, &adapter) == DXGI_ERROR_NOT_FOUND || adapter == nullptr) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs index a5f288594..b8ffd2629 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs +++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GraphicsDeviceDX11.Build.cs @@ -14,5 +14,7 @@ public class GraphicsDeviceDX11 : GraphicsDeviceBaseModule options.PublicDefinitions.Add("GRAPHICS_API_DIRECTX11"); options.OutputFiles.Add("d3d11.lib"); + if (nvapi.Use(options)) + options.PrivateDependencies.Add("nvapi"); } }