Add nvapi lib to D3D11 for efficient UAV writes overlaps on NVIDIA GPUs

This commit is contained in:
Wojtek Figat
2025-08-10 16:05:18 +02:00
parent 4fd6343fb9
commit c1c806490f
5 changed files with 59 additions and 1 deletions

View File

@@ -46,6 +46,12 @@ struct FLAXENGINE_API GPUMemoryPass : GPUPass
{
Context->MemoryBarrier();
}
// Inserts a global memory barrier on data copies between resources. Use to ensure all writes and before submitting another commands.
void MemoryBarrier()
{
Context->MemoryBarrier();
}
};
/// <summary>

View File

@@ -14,6 +14,10 @@
#include "Engine/Core/Math/Viewport.h"
#include "Engine/Core/Math/Rectangle.h"
#include "Engine/Profiler/RenderStats.h"
#if COMPILE_WITH_NVAPI
#include <ThirdParty/nvapi/nvapi.h>
extern bool EnableNvapi;
#endif
#define DX11_CLEAR_SR_ON_STAGE_DISABLE 0
@@ -903,6 +907,23 @@ void GPUContextDX11::CopySubresource(GPUResource* dstResource, uint32 dstSubreso
_context->CopySubresourceRegion(dstResourceDX11->GetResource(), dstSubresource, 0, 0, 0, srcResourceDX11->GetResource(), srcSubresource, nullptr);
}
void GPUContextDX11::OverlapUA(bool end)
{
// DirectX 11 doesn't support UAV barriers control but custom GPU driver extensions allow to manually specify overlap sections.
#if COMPILE_WITH_NVAPI
if (EnableNvapi)
{
if (end)
NvAPI_D3D11_EndUAVOverlap(_context);
else
NvAPI_D3D11_BeginUAVOverlap(_context);
return;
}
#endif
// TODO: add support for AMD extensions to overlap UAV writes (agsDriverExtensionsDX11_BeginUAVOverlap/agsDriverExtensionsDX11_EndUAVOverlap)
// TODO: add support for Intel extensions to overlap UAV writes (INTC_D3D11_BeginUAVOverlap/INTC_D3D11_EndUAVOverlap)
}
void GPUContextDX11::flushSRVs()
{
#define FLUSH_STAGE(STAGE) if (Current##STAGE) _context->STAGE##SetShaderResources(0, ARRAY_COUNT(_srHandles), _srHandles)

View File

@@ -167,6 +167,7 @@ public:
void CopyCounter(GPUBuffer* dstBuffer, uint32 dstOffset, GPUBuffer* srcBuffer) override;
void CopyResource(GPUResource* dstResource, GPUResource* srcResource) override;
void CopySubresource(GPUResource* dstResource, uint32 dstSubresource, GPUResource* srcResource, uint32 srcSubresource) override;
void OverlapUA(bool end) override;
};
#endif

View File

@@ -20,7 +20,10 @@
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Engine/CommandLine.h"
#include "Engine/Profiler/ProfilerMemory.h"
#if COMPILE_WITH_NVAPI
#include <ThirdParty/nvapi/nvapi.h>
bool EnableNvapi = false;
#endif
#if !USE_EDITOR && PLATFORM_WINDOWS
#include "Engine/Core/Config/PlatformSettings.h"
#endif
@@ -420,6 +423,31 @@ bool GPUDeviceDX11::Init()
{
HRESULT result;
// Driver extensions
#if COMPILE_WITH_NVAPI
if (_adapter->IsNVIDIA())
{
NvAPI_Status status = NvAPI_Initialize();
if (status == NVAPI_OK)
{
EnableNvapi = true;
NvU32 driverVersion;
NvAPI_ShortString buildBranch("");
if (NvAPI_SYS_GetDriverAndBranchVersion(&driverVersion, buildBranch) == NVAPI_OK)
{
LOG(Info, "NvApi driver version: {}, {}", driverVersion, StringAsUTF16<>(buildBranch).Get());
}
}
else
{
NvAPI_ShortString desc;
NvAPI_GetErrorMessage(status, desc);
LOG(Warning, "NvAPI_Initialize failed with result {} ({})", (int32)status, String(desc));
}
}
#endif
// Get DXGI adapter
ComPtr<IDXGIAdapter> adapter;
if (_factoryDXGI->EnumAdapters(_adapter->Index, &adapter) == DXGI_ERROR_NOT_FOUND || adapter == nullptr)

View File

@@ -14,5 +14,7 @@ public class GraphicsDeviceDX11 : GraphicsDeviceBaseModule
options.PublicDefinitions.Add("GRAPHICS_API_DIRECTX11");
options.OutputFiles.Add("d3d11.lib");
if (nvapi.Use(options))
options.PrivateDependencies.Add("nvapi");
}
}