Add support for Depth Bounds test in all graphics APIs

This commit is contained in:
Wojtek Figat
2026-01-19 17:44:45 +01:00
parent ee75cab73e
commit d049a16882
21 changed files with 332 additions and 98 deletions

View File

@@ -617,6 +617,13 @@ public:
/// <param name="scissorRect">The scissor rectangle (in pixels).</param>
API_FUNCTION() virtual void SetScissor(API_PARAM(Ref) const Rectangle& scissorRect) = 0;
/// <summary>
/// Sets the minimum and maximum depth values for depth bounds test.
/// </summary>
/// <param name="minDepth">The minimum value for depth bound test.</param>
/// <param name="maxDepth">The maximum value for depth bound test.</param>
API_FUNCTION() virtual void SetDepthBounds(float minDepth, float maxDepth) = 0;
public:
/// <summary>
/// Sets the graphics pipeline state.

View File

@@ -160,6 +160,7 @@ GPUPipelineState::Description GPUPipelineState::Description::Default =
true, // DepthEnable
true, // DepthWriteEnable
true, // DepthClipEnable
false, // DepthBoundsEnable
ComparisonFunc::Less, // DepthFunc
false, // StencilEnable
0xff, // StencilReadMask
@@ -184,6 +185,7 @@ GPUPipelineState::Description GPUPipelineState::Description::DefaultNoDepth =
false, // DepthEnable
false, // DepthWriteEnable
false, // DepthClipEnable
false, // DepthBoundsEnable
ComparisonFunc::Less, // DepthFunc
false, // StencilEnable
0xff, // StencilReadMask
@@ -208,6 +210,7 @@ GPUPipelineState::Description GPUPipelineState::Description::DefaultFullscreenTr
false, // DepthEnable
false, // DepthWriteEnable
false, // DepthClipEnable
false, // DepthBoundsEnable
ComparisonFunc::Less, // DepthFunc
false, // StencilEnable
0xff, // StencilReadMask

View File

@@ -264,6 +264,11 @@ API_STRUCT() struct GPULimits
/// </summary>
API_FIELD() bool HasDepthClip;
/// <summary>
/// True if device supports depth buffer bounds testing (see GPUPipelineState::Description::DepthBoundsEnable and GPUContext::SetDepthBounds).
/// </summary>
API_FIELD() bool HasDepthBounds = false;
/// <summary>
/// True if device supports depth buffer texture as a readonly depth buffer (can be sampled in the shader while performing depth-test).
/// </summary>

View File

@@ -64,6 +64,11 @@ public:
/// </summary>
API_FIELD() bool DepthClipEnable;
/// <summary>
/// Enable/disable depth bounds testing (min/max values provided via GPUContext::SetDepthBounds)
/// </summary>
API_FIELD() bool DepthBoundsEnable;
/// <summary>
/// A function that compares depth data against existing depth data
/// </summary>

View File

@@ -97,6 +97,7 @@ void GPUContextDX11::FrameBegin()
// Setup
_flushOnDispatch = false;
_depthBounds = false;
_omDirtyFlag = false;
_uaDirtyFlag = false;
_cbDirtyFlag = false;
@@ -658,6 +659,28 @@ void GPUContextDX11::SetScissor(const Rectangle& scissorRect)
_context->RSSetScissorRects(1, &rect);
}
void GPUContextDX11::SetDepthBounds(float minDepth, float maxDepth)
{
SetDepthBounds(true, minDepth, maxDepth);
}
void GPUContextDX11::SetDepthBounds(bool enable, float minDepth, float maxDepth)
{
_depthBounds = true;
#if COMPILE_WITH_NVAPI
if (EnableNvapi)
{
NvAPI_D3D11_SetDepthBoundsTest(_context, enable, minDepth, maxDepth);
}
#endif
#if COMPILE_WITH_AGS
if (AgsContext)
{
agsDriverExtensionsDX11_SetDepthBounds(AgsContext, _context, enable, minDepth, maxDepth);
}
#endif
}
GPUPipelineState* GPUContextDX11::GetState() const
{
return _currentState;
@@ -1099,6 +1122,12 @@ void GPUContextDX11::flushIA()
void GPUContextDX11::onDrawCall()
{
if (_depthBounds && (!_currentState || !_currentState->DepthBounds))
{
// Auto-disable depth bounds
SetDepthBounds(false, 0.0f, 1.0f);
}
_flushOnDispatch = false;
flushCBs();
flushSRVs();

View File

@@ -31,6 +31,7 @@ private:
#endif
int32 _maxUASlots;
bool _flushOnDispatch;
bool _depthBounds;
// Output Merger
bool _omDirtyFlag;
@@ -113,6 +114,7 @@ private:
void flushIA();
void onDrawCall();
void onDispatch(GPUShaderProgramCS* shader);
void SetDepthBounds(bool enable, float minDepth, float maxDepth);
public:
@@ -158,6 +160,7 @@ public:
void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
void SetDepthBounds(float minDepth, float maxDepth) override;
GPUPipelineState* GetState() const override;
void SetState(GPUPipelineState* state) override;
void ResetState() override;

View File

@@ -26,6 +26,8 @@ bool EnableNvapi = false;
#endif
#if COMPILE_WITH_AGS
#include <ThirdParty/AGS/amd_ags.h>
#include "Engine/Engine/Globals.h"
#include "FlaxEngine.Gen.h"
AGSContext* AgsContext = nullptr;
#endif
#if !USE_EDITOR && PLATFORM_WINDOWS
@@ -469,23 +471,23 @@ bool GPUDeviceDX11::Init()
if (returnCode == AGS_SUCCESS)
{
LOG(Info, "AMD driver version: {}, Radeon Software Version {}", TO_UTF16(gpuInfo.driverVersion), TO_UTF16(gpuInfo.radeonSoftwareVersion));
const Char* asicFamily[] =
{
TEXT("Unknown"),
TEXT("Pre GCN"),
TEXT("GCN Gen1"),
TEXT("GCN Gen2"),
TEXT("GCN Gen3"),
TEXT("GCN Gen4"),
TEXT("Vega"),
TEXT("RDNA"),
TEXT("RDNA2"),
TEXT("RDNA3"),
TEXT("RDNA4"),
};
for (int32 i = 0; i < gpuInfo.numDevices; i++)
{
AGSDeviceInfo& deviceInfo = gpuInfo.devices[i];
const Char* asicFamily[] =
{
TEXT("Unknown"),
TEXT("Pre GCN"),
TEXT("GCN Gen1"),
TEXT("GCN Gen2"),
TEXT("GCN Gen3"),
TEXT("GCN Gen4"),
TEXT("Vega"),
TEXT("RDNA"),
TEXT("RDNA2"),
TEXT("RDNA3"),
TEXT("RDNA4"),
};
LOG(Info, " > GPU {}: {} ({})", i, TO_UTF16(deviceInfo.adapterString), asicFamily[deviceInfo.asicFamily <= AGSAsicFamily_RDNA4 ? deviceInfo.asicFamily : 0]);
LOG(Info, " CUs: {}, WGPs: {}, ROPs: {}", deviceInfo.numCUs, deviceInfo.numWGPs, deviceInfo.numROPs);
LOG(Info, " Core clock: {} MHz, Memory clock: {} MHz, {:.2f} Tflops", deviceInfo.coreClock, deviceInfo.memoryClock, deviceInfo.teraFlops);
@@ -494,7 +496,8 @@ bool GPUDeviceDX11::Init()
}
else
{
LOG(Warning, "agsInitialize failed with result {} ({})", (int32)returnCode);
LOG(Warning, "agsInitialize failed with result {}", (int32)returnCode);
AgsContext = nullptr;
}
}
#endif
@@ -518,9 +521,38 @@ bool GPUDeviceDX11::Init()
// Create DirectX device
D3D_FEATURE_LEVEL createdFeatureLevel = static_cast<D3D_FEATURE_LEVEL>(0);
D3D_FEATURE_LEVEL targetFeatureLevel = _adapter->MaxFeatureLevel;
VALIDATE_DIRECTX_CALL(D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, flags, &targetFeatureLevel, 1, D3D11_SDK_VERSION, &_device, &createdFeatureLevel, &_imContext));
ASSERT(_device);
ASSERT(_imContext);
#if COMPILE_WITH_AGS
AGSDX11ReturnedParams AgsReturnedParams;
if (AgsContext)
{
AGSDX11DeviceCreationParams creationParams = { adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, flags, &targetFeatureLevel, 1, D3D11_SDK_VERSION, nullptr };
AGSDX11ExtensionParams extensionParams = {
*Globals::ProductName,
TEXT("Flax"),
AGS_UNSPECIFIED_VERSION,
AGS_MAKE_VERSION(FLAXENGINE_VERSION_MAJOR, FLAXENGINE_VERSION_MINOR, FLAXENGINE_VERSION_BUILD),
0,
7,
AGS_CROSSFIRE_MODE_DISABLE
};
Platform::MemoryClear(&AgsReturnedParams, sizeof(AgsReturnedParams));
AGSReturnCode returnCode = agsDriverExtensionsDX11_CreateDevice(AgsContext, &creationParams, &extensionParams, &AgsReturnedParams);
if (returnCode != AGS_SUCCESS)
{
LOG(Error, "agsDriverExtensionsDX11_CreateDevice failed with result {}", (int32)returnCode);
return true;
}
_device = AgsReturnedParams.pDevice;
_imContext = AgsReturnedParams.pImmediateContext;
createdFeatureLevel = AgsReturnedParams.featureLevel;
}
else
#endif
{
VALIDATE_DIRECTX_CALL(D3D11CreateDevice(adapter, D3D_DRIVER_TYPE_UNKNOWN, NULL, flags, &targetFeatureLevel, 1, D3D11_SDK_VERSION, &_device, &createdFeatureLevel, &_imContext));
}
if (!_device || !_imContext)
return true;
ASSERT(createdFeatureLevel == targetFeatureLevel);
_state = DeviceState::Created;
@@ -628,6 +660,20 @@ bool GPUDeviceDX11::Init()
_device->CheckFormatSupport(dxgiFormat, &formatSupport);
FeaturesPerFormat[i] = FormatFeatures(format, static_cast<MSAALevel>(maxCount), (FormatSupport)formatSupport);
}
// Driver extensions support
#if COMPILE_WITH_NVAPI
if (EnableNvapi)
{
limits.HasDepthBounds = true;
}
#endif
#if COMPILE_WITH_AGS
if (AgsContext && AgsReturnedParams.extensionsSupported.depthBoundsTest != 0)
{
limits.HasDepthBounds = true;
}
#endif
}
// Init debug layer

View File

@@ -54,6 +54,7 @@ bool GPUPipelineStateDX11::Init(const Description& desc)
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
};
PrimitiveTopology = D3D11_primTypes[static_cast<int32>(desc.PrimitiveTopology)];
DepthBounds = desc.DepthBoundsEnable;
#if GPU_ALLOW_TESSELLATION_SHADERS
if (HS)
PrimitiveTopology = (D3D11_PRIMITIVE_TOPOLOGY)((int32)D3D11_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (HS->GetControlPointsCount() - 1));

View File

@@ -16,6 +16,8 @@ class GPUPipelineStateDX11 : public GPUResourceDX11<GPUPipelineState>
public:
int32 RasterizerStateIndex;
D3D11_PRIMITIVE_TOPOLOGY PrimitiveTopology;
bool DepthBounds;
ID3D11DepthStencilState* DepthStencilState = nullptr;
ID3D11BlendState* BlendState = nullptr;
GPUShaderProgramVSDX11* VS = nullptr;
@@ -27,7 +29,6 @@ public:
GPUShaderProgramGSDX11* GS = nullptr;
#endif
GPUShaderProgramPSDX11* PS = nullptr;
D3D11_PRIMITIVE_TOPOLOGY PrimitiveTopology;
public:

View File

@@ -140,6 +140,9 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty
FrameFenceValues[1] = 0;
_currentAllocator = _device->GetCommandQueue()->RequestAllocator();
VALIDATE_DIRECTX_CALL(device->GetDevice()->CreateCommandList(0, type, _currentAllocator, nullptr, IID_PPV_ARGS(&_commandList)));
#ifdef __ID3D12GraphicsCommandList1_FWD_DEFINED__
_commandList->QueryInterface(IID_PPV_ARGS(&_commandList1));
#endif
#if GPU_ENABLE_RESOURCE_NAMING
_commandList->SetName(TEXT("GPUContextDX12::CommandList"));
#endif
@@ -1315,6 +1318,14 @@ void GPUContextDX12::SetScissor(const Rectangle& scissorRect)
_commandList->RSSetScissorRects(1, &rect);
}
void GPUContextDX12::SetDepthBounds(float minDepth, float maxDepth)
{
#ifdef __ID3D12GraphicsCommandList1_FWD_DEFINED__
if (_commandList1)
_commandList1->OMSetDepthBounds(minDepth, maxDepth);
#endif
}
GPUPipelineState* GPUContextDX12::GetState() const
{
return _currentState;

View File

@@ -38,6 +38,9 @@ private:
GPUDeviceDX12* _device;
ID3D12GraphicsCommandList* _commandList;
#ifdef __ID3D12GraphicsCommandList1_FWD_DEFINED__
ID3D12GraphicsCommandList1* _commandList1;
#endif
ID3D12CommandAllocator* _currentAllocator;
GPUPipelineStateDX12* _currentState;
GPUShaderProgramCS* _currentCompute;
@@ -201,6 +204,7 @@ public:
void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
void SetDepthBounds(float minDepth, float maxDepth) override;
GPUPipelineState* GetState() const override;
void SetState(GPUPipelineState* state) override;
void ResetState() override;

View File

@@ -548,7 +548,6 @@ static MSAALevel GetMaximumMultisampleCount(ID3D12Device* device, DXGI_FORMAT dx
GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
: GPUDeviceDX(RendererType::DirectX12, ShaderProfile::DirectX_SM6, adapter)
, _device(nullptr)
, _factoryDXGI(dxgiFactory)
, _res2Dispose(256)
, _rootSignature(nullptr)
@@ -735,6 +734,14 @@ bool GPUDeviceDX12::Init()
#endif
#endif
// Get newer device interfaces
#ifdef __ID3D12Device1_FWD_DEFINED__
_device->QueryInterface(IID_PPV_ARGS(&_device1));
#endif
#ifdef __ID3D12Device2_FWD_DEFINED__
_device->QueryInterface(IID_PPV_ARGS(&_device2));
#endif
// Change state
_state = DeviceState::Created;
@@ -781,6 +788,11 @@ bool GPUDeviceDX12::Init()
const MSAALevel maximumMultisampleCount = GetMaximumMultisampleCount(_device, dxgiFormat);
FeaturesPerFormat[i] = FormatFeatures(format, maximumMultisampleCount, (FormatSupport)formatInfo.Support1);
}
D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2 = {};
if (SUCCEEDED(_device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2, &options2, sizeof(options2))))
limits.HasDepthBounds = !!options2.DepthBoundsTestSupported;
}
#if !BUILD_RELEASE

View File

@@ -44,7 +44,13 @@ private:
private:
// Private Stuff
ID3D12Device* _device;
ID3D12Device* _device = nullptr;
#ifdef __ID3D12Device1_FWD_DEFINED__
ID3D12Device1* _device1 = nullptr;
#endif
#ifdef __ID3D12Device2_FWD_DEFINED__
ID3D12Device2* _device2 = nullptr;
#endif
IDXGIFactory4* _factoryDXGI;
CriticalSection _res2DisposeLock;
Array<DisposeResourceEntry> _res2Dispose;
@@ -85,6 +91,12 @@ public:
{
return _device;
}
#ifdef __ID3D12Device1_FWD_DEFINED__
FORCE_INLINE ID3D12Device1* GetDevice1() const { return _device1; }
#endif
#ifdef __ID3D12Device2_FWD_DEFINED__
FORCE_INLINE ID3D12Device2* GetDevice2() const { return _device2; }
#endif
/// <summary>
/// Gets DXGI factory.

View File

@@ -9,6 +9,37 @@
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#if GPU_D3D12_PSO_STREAM
// TODO: migrate to Agility SDK and remove that custom header
#include <ThirdParty/DirectX12Agility/d3dx12/d3dx12_pipeline_state_stream_custom.h>
#endif
#if GPU_D3D12_PSO_STREAM
struct alignas(void*) GraphicsPipelineStateStreamDX12
{
CD3DX12_PIPELINE_STATE_STREAM_ROOT_SIGNATURE pRootSignature;
CD3DX12_PIPELINE_STATE_STREAM_VS VS;
CD3DX12_PIPELINE_STATE_STREAM_PS PS;
#if GPU_ALLOW_GEOMETRY_SHADERS
CD3DX12_PIPELINE_STATE_STREAM_GS GS;
#endif
#if GPU_ALLOW_TESSELLATION_SHADERS
CD3DX12_PIPELINE_STATE_STREAM_HS HS;
CD3DX12_PIPELINE_STATE_STREAM_DS DS;
#endif
CD3DX12_PIPELINE_STATE_STREAM_BLEND_DESC BlendState;
CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_MASK SampleMask;
CD3DX12_PIPELINE_STATE_STREAM_RASTERIZER RasterizerState;
CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL1 DepthStencilState;
CD3DX12_PIPELINE_STATE_STREAM_INPUT_LAYOUT InputLayout;
CD3DX12_PIPELINE_STATE_STREAM_PRIMITIVE_TOPOLOGY PrimitiveTopologyType;
CD3DX12_PIPELINE_STATE_STREAM_DEPTH_STENCIL_FORMAT DSVFormat;
CD3DX12_PIPELINE_STATE_STREAM_RENDER_TARGET_FORMATS RTFormats;
CD3DX12_PIPELINE_STATE_STREAM_SAMPLE_DESC SampleDesc;
};
#else
typedef D3D12_GRAPHICS_PIPELINE_STATE_DESC GraphicsPipelineStateStreamDX12;
#endif
static D3D12_STENCIL_OP ToStencilOp(StencilOperation value)
{
@@ -80,14 +111,27 @@ ID3D12PipelineState* GPUPipelineStateDX12::GetState(GPUTextureViewDX12* depth, i
ZoneText(name.Get(), name.Count() - 1);
#endif
// Update description to match the pipeline
_desc.NumRenderTargets = key.RTsCount;
// Setup description to match the pipeline
GraphicsPipelineStateStreamDX12 desc = {};
desc.pRootSignature = _device->GetRootSignature();
desc.PrimitiveTopologyType = _primitiveTopology;
desc.DepthStencilState = _depthStencil;
desc.RasterizerState = _rasterizer;
desc.BlendState = _blend;
#if GPU_D3D12_PSO_STREAM
D3D12_RT_FORMAT_ARRAY rtFormats = {};
rtFormats.NumRenderTargets = key.RTsCount;
for (int32 i = 0; i < GPU_MAX_RT_BINDED; i++)
_desc.RTVFormats[i] = RenderToolsDX::ToDxgiFormat(key.RTVsFormats[i]);
_desc.SampleDesc.Count = static_cast<UINT>(key.MSAA);
_desc.SampleDesc.Quality = key.MSAA == MSAALevel::None ? 0 : GPUDeviceDX12::GetMaxMSAAQuality((int32)key.MSAA);
_desc.SampleMask = D3D12_DEFAULT_SAMPLE_MASK;
_desc.DSVFormat = RenderToolsDX::ToDxgiFormat(PixelFormatExtensions::FindDepthStencilFormat(key.DepthFormat));
rtFormats.RTFormats[i] = RenderToolsDX::ToDxgiFormat(key.RTVsFormats[i]);
desc.RTFormats = rtFormats;
#else
desc.NumRenderTargets = key.RTsCount;
for (int32 i = 0; i < GPU_MAX_RT_BINDED; i++)
desc.RTVFormats[i] = RenderToolsDX::ToDxgiFormat(key.RTVsFormats[i]);
#endif
desc.SampleDesc = { (UINT)key.MSAA, key.MSAA == MSAALevel::None ? 0 : GPUDeviceDX12::GetMaxMSAAQuality((int32)key.MSAA) };
desc.SampleMask = D3D12_DEFAULT_SAMPLE_MASK;
desc.DSVFormat = RenderToolsDX::ToDxgiFormat(PixelFormatExtensions::FindDepthStencilFormat(key.DepthFormat));
if (!vertexLayout)
vertexLayout = VertexBufferLayout; // Fallback to shader-specified layout (if using old APIs)
if (vertexLayout)
@@ -95,17 +139,29 @@ ID3D12PipelineState* GPUPipelineStateDX12::GetState(GPUTextureViewDX12* depth, i
int32 missingSlotOverride = GPU_MAX_VB_BINDED; // Use additional slot with empty VB
if (VertexInputLayout)
vertexLayout = (GPUVertexLayoutDX12*)GPUVertexLayout::Merge(vertexLayout, VertexInputLayout, false, true, missingSlotOverride);
_desc.InputLayout.pInputElementDescs = vertexLayout->InputElements;
_desc.InputLayout.NumElements = vertexLayout->InputElementsCount;
desc.InputLayout = { vertexLayout->InputElements, vertexLayout->InputElementsCount };
}
else
{
_desc.InputLayout.pInputElementDescs = nullptr;
_desc.InputLayout.NumElements = 0;
desc.InputLayout = { nullptr, 0 };
}
#if GPU_ALLOW_TESSELLATION_SHADERS
desc.HS = _shaderHS;
desc.DS = _shaderDS;
#endif
#if GPU_ALLOW_GEOMETRY_SHADERS
desc.GS = _shaderGS;
#endif
desc.VS = _shaderVS;
desc.PS = _shaderPS;
// Create object
const HRESULT result = _device->GetDevice()->CreateGraphicsPipelineState(&_desc, IID_PPV_ARGS(&state));
#if GPU_D3D12_PSO_STREAM
D3D12_PIPELINE_STATE_STREAM_DESC streamDesc = { sizeof(desc), &desc };
const HRESULT result = _device->GetDevice2()->CreatePipelineState(&streamDesc, IID_PPV_ARGS(&state));
#else
const HRESULT result = _device->GetDevice()->CreateGraphicsPipelineState(&desc, IID_PPV_ARGS(&state));
#endif
LOG_DIRECTX_RESULT(result);
if (FAILED(result))
{
@@ -138,17 +194,12 @@ bool GPUPipelineStateDX12::Init(const Description& desc)
if (IsValid())
OnReleaseGPU();
// Create description
D3D12_GRAPHICS_PIPELINE_STATE_DESC psDesc;
Platform::MemoryClear(&psDesc, sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC));
psDesc.pRootSignature = _device->GetRootSignature();
// Shaders
Platform::MemoryClear(&Header, sizeof(Header));
#define INIT_SHADER_STAGE(stage, type) \
if (desc.stage) \
{ \
psDesc.stage = { desc.stage->GetBufferHandle(), desc.stage->GetBufferSize() }; \
_shader##stage = { desc.stage->GetBufferHandle(), desc.stage->GetBufferSize() }; \
auto shader = (type*)desc.stage; \
auto srCount = Math::FloorLog2(shader->GetBindings().UsedSRsMask) + 1; \
for (uint32 i = 0; i < srCount; i++) \
@@ -158,7 +209,8 @@ bool GPUPipelineStateDX12::Init(const Description& desc)
for (uint32 i = 0; i < uaCount; i++) \
if (shader->Header.UaDimensions[i]) \
Header.UaDimensions[i] = shader->Header.UaDimensions[i]; \
}
} \
else _shader##stage = {};
#if GPU_ALLOW_TESSELLATION_SHADERS
INIT_SHADER_STAGE(HS, GPUShaderProgramHSDX12);
INIT_SHADER_STAGE(DS, GPUShaderProgramDSDX12);
@@ -189,73 +241,65 @@ bool GPUPipelineStateDX12::Init(const Description& desc)
D3D_PRIMITIVE_TOPOLOGY_LINELIST,
D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST,
};
psDesc.PrimitiveTopologyType = primTypes1[(int32)desc.PrimitiveTopology];
_primitiveTopology = primTypes1[(int32)desc.PrimitiveTopology];
PrimitiveTopology = primTypes2[(int32)desc.PrimitiveTopology];
#if GPU_ALLOW_TESSELLATION_SHADERS
if (desc.HS)
{
psDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
_primitiveTopology = D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
PrimitiveTopology = (D3D_PRIMITIVE_TOPOLOGY)((int32)D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + (desc.HS->GetControlPointsCount() - 1));
}
#endif
// Depth State
psDesc.DepthStencilState.DepthEnable = !!desc.DepthEnable;
psDesc.DepthStencilState.DepthWriteMask = desc.DepthWriteEnable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
psDesc.DepthStencilState.DepthFunc = static_cast<D3D12_COMPARISON_FUNC>(desc.DepthFunc);
psDesc.DepthStencilState.StencilEnable = !!desc.StencilEnable;
psDesc.DepthStencilState.StencilReadMask = desc.StencilReadMask;
psDesc.DepthStencilState.StencilWriteMask = desc.StencilWriteMask;
psDesc.DepthStencilState.FrontFace.StencilFailOp = ToStencilOp(desc.StencilFailOp);
psDesc.DepthStencilState.FrontFace.StencilDepthFailOp = ToStencilOp(desc.StencilDepthFailOp);
psDesc.DepthStencilState.FrontFace.StencilPassOp = ToStencilOp(desc.StencilPassOp);
psDesc.DepthStencilState.FrontFace.StencilFunc = static_cast<D3D12_COMPARISON_FUNC>(desc.StencilFunc);
psDesc.DepthStencilState.BackFace = psDesc.DepthStencilState.FrontFace;
Platform::MemoryClear(&_depthStencil, sizeof(_depthStencil));
_depthStencil.DepthEnable = !!desc.DepthEnable;
_depthStencil.DepthWriteMask = desc.DepthWriteEnable ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
#if GPU_D3D12_PSO_STREAM
_depthStencil.DepthBoundsTestEnable = !!desc.DepthBoundsEnable;
#endif
_depthStencil.DepthFunc = static_cast<D3D12_COMPARISON_FUNC>(desc.DepthFunc);
_depthStencil.StencilEnable = !!desc.StencilEnable;
_depthStencil.StencilReadMask = desc.StencilReadMask;
_depthStencil.StencilWriteMask = desc.StencilWriteMask;
_depthStencil.FrontFace.StencilFailOp = ToStencilOp(desc.StencilFailOp);
_depthStencil.FrontFace.StencilDepthFailOp = ToStencilOp(desc.StencilDepthFailOp);
_depthStencil.FrontFace.StencilPassOp = ToStencilOp(desc.StencilPassOp);
_depthStencil.FrontFace.StencilFunc = static_cast<D3D12_COMPARISON_FUNC>(desc.StencilFunc);
_depthStencil.BackFace = _depthStencil.FrontFace;
// Rasterizer State
psDesc.RasterizerState.FillMode = desc.Wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
D3D12_CULL_MODE dxCullMode;
switch (desc.CullMode)
Platform::MemoryClear(&_rasterizer, sizeof(_rasterizer));
_rasterizer.FillMode = desc.Wireframe ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
const D3D12_CULL_MODE cullModes[] =
{
case CullMode::Normal:
dxCullMode = D3D12_CULL_MODE_BACK;
break;
case CullMode::Inverted:
dxCullMode = D3D12_CULL_MODE_FRONT;
break;
case CullMode::TwoSided:
dxCullMode = D3D12_CULL_MODE_NONE;
break;
}
psDesc.RasterizerState.CullMode = dxCullMode;
psDesc.RasterizerState.FrontCounterClockwise = FALSE;
psDesc.RasterizerState.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
psDesc.RasterizerState.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
psDesc.RasterizerState.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
psDesc.RasterizerState.DepthClipEnable = !!desc.DepthClipEnable;
psDesc.RasterizerState.MultisampleEnable = TRUE;
psDesc.RasterizerState.AntialiasedLineEnable = !!desc.Wireframe;
psDesc.RasterizerState.ForcedSampleCount = 0;
psDesc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
D3D12_CULL_MODE_BACK,
D3D12_CULL_MODE_FRONT,
D3D12_CULL_MODE_NONE,
};
_rasterizer.CullMode = cullModes[(int32)desc.CullMode];
_rasterizer.FrontCounterClockwise = FALSE;
_rasterizer.DepthBias = D3D12_DEFAULT_DEPTH_BIAS;
_rasterizer.DepthBiasClamp = D3D12_DEFAULT_DEPTH_BIAS_CLAMP;
_rasterizer.SlopeScaledDepthBias = D3D12_DEFAULT_SLOPE_SCALED_DEPTH_BIAS;
_rasterizer.DepthClipEnable = !!desc.DepthClipEnable;
_rasterizer.MultisampleEnable = TRUE;
_rasterizer.AntialiasedLineEnable = !!desc.Wireframe;
// Blend State
psDesc.BlendState.AlphaToCoverageEnable = desc.BlendMode.AlphaToCoverageEnable ? TRUE : FALSE;
psDesc.BlendState.IndependentBlendEnable = FALSE;
psDesc.BlendState.RenderTarget[0].BlendEnable = desc.BlendMode.BlendEnable ? TRUE : FALSE;
psDesc.BlendState.RenderTarget[0].SrcBlend = (D3D12_BLEND)desc.BlendMode.SrcBlend;
psDesc.BlendState.RenderTarget[0].DestBlend = (D3D12_BLEND)desc.BlendMode.DestBlend;
psDesc.BlendState.RenderTarget[0].BlendOp = (D3D12_BLEND_OP)desc.BlendMode.BlendOp;
psDesc.BlendState.RenderTarget[0].SrcBlendAlpha = (D3D12_BLEND)desc.BlendMode.SrcBlendAlpha;
psDesc.BlendState.RenderTarget[0].DestBlendAlpha = (D3D12_BLEND)desc.BlendMode.DestBlendAlpha;
psDesc.BlendState.RenderTarget[0].BlendOpAlpha = (D3D12_BLEND_OP)desc.BlendMode.BlendOpAlpha;
psDesc.BlendState.RenderTarget[0].RenderTargetWriteMask = (UINT8)desc.BlendMode.RenderTargetWriteMask;
#if BUILD_DEBUG
for (byte i = 1; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
psDesc.BlendState.RenderTarget[i] = psDesc.BlendState.RenderTarget[0];
#endif
// Cache description
_desc = psDesc;
Platform::MemoryClear(&_blend, sizeof(_blend));
_blend.AlphaToCoverageEnable = desc.BlendMode.AlphaToCoverageEnable ? TRUE : FALSE;
_blend.IndependentBlendEnable = FALSE;
_blend.RenderTarget[0].BlendEnable = desc.BlendMode.BlendEnable ? TRUE : FALSE;
_blend.RenderTarget[0].SrcBlend = (D3D12_BLEND)desc.BlendMode.SrcBlend;
_blend.RenderTarget[0].DestBlend = (D3D12_BLEND)desc.BlendMode.DestBlend;
_blend.RenderTarget[0].BlendOp = (D3D12_BLEND_OP)desc.BlendMode.BlendOp;
_blend.RenderTarget[0].SrcBlendAlpha = (D3D12_BLEND)desc.BlendMode.SrcBlendAlpha;
_blend.RenderTarget[0].DestBlendAlpha = (D3D12_BLEND)desc.BlendMode.DestBlendAlpha;
_blend.RenderTarget[0].BlendOpAlpha = (D3D12_BLEND_OP)desc.BlendMode.BlendOpAlpha;
_blend.RenderTarget[0].RenderTargetWriteMask = (UINT8)desc.BlendMode.RenderTargetWriteMask;
for (uint32 i = 1; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; i++)
_blend.RenderTarget[i] = _blend.RenderTarget[0];
// Set non-zero memory usage
_memoryUsage = sizeof(D3D12_GRAPHICS_PIPELINE_STATE_DESC);

View File

@@ -9,6 +9,11 @@
#include "Types.h"
#include "Engine/Core/Collections/Dictionary.h"
#include "../IncludeDirectXHeaders.h"
#ifdef __ID3D12Device2_FWD_DEFINED__
#define GPU_D3D12_PSO_STREAM 1
#else
#define GPU_D3D12_PSO_STREAM 0
#endif
class GPUTextureViewDX12;
class GPUVertexLayoutDX12;
@@ -50,7 +55,22 @@ class GPUPipelineStateDX12 : public GPUResourceDX12<GPUPipelineState>
{
private:
Dictionary<GPUPipelineStateKeyDX12, ID3D12PipelineState*> _states;
D3D12_GRAPHICS_PIPELINE_STATE_DESC _desc;
//GraphicsPipelineStateStreamDX12 _desc;
#if GPU_D3D12_PSO_STREAM
D3D12_DEPTH_STENCIL_DESC1 _depthStencil;
#else
D3D12_DEPTH_STENCIL_DESC _depthStencil;
#endif
D3D12_RASTERIZER_DESC _rasterizer;
D3D12_BLEND_DESC _blend;
D3D12_PRIMITIVE_TOPOLOGY_TYPE _primitiveTopology;
#if GPU_ALLOW_TESSELLATION_SHADERS
D3D12_SHADER_BYTECODE _shaderHS, _shaderDS;
#endif
#if GPU_ALLOW_GEOMETRY_SHADERS
D3D12_SHADER_BYTECODE _shaderGS;
#endif
D3D12_SHADER_BYTECODE _shaderVS, _shaderPS;
public:
GPUPipelineStateDX12(GPUDeviceDX12* device);

View File

@@ -177,6 +177,10 @@ public:
{
}
void SetDepthBounds(float minDepth, float maxDepth) override
{
}
GPUPipelineState* GetState() const override
{
return nullptr;

View File

@@ -713,10 +713,16 @@ void GPUContextVulkan::OnDrawCall()
if (_psDirtyFlag && pipelineState && (_rtDepth || _rtCount))
{
_psDirtyFlag = false;
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer();
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer()->GetHandle();
const auto pipeline = pipelineState->GetState(_renderPass, _vertexLayout);
vkCmdBindPipeline(cmdBuffer->GetHandle(), VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
vkCmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
RENDER_STAT_PS_STATE_CHANGE();
if (_depthBoundsEnable && (!_currentState || !_currentState->DepthBoundsEnable))
{
// Auto-disable depth bounds
_depthBoundsEnable = false;
//vkCmdSetDepthBoundsTestEnable(cmdBuffer, false);
}
}
// Bind descriptors sets to the graphics pipeline
@@ -1381,6 +1387,17 @@ void GPUContextVulkan::SetScissor(const Rectangle& scissorRect)
vkCmdSetScissor(_cmdBufferManager->GetCmdBuffer()->GetHandle(), 0, 1, &rect);
}
void GPUContextVulkan::SetDepthBounds(float minDepth, float maxDepth)
{
const auto cmdBuffer = _cmdBufferManager->GetCmdBuffer()->GetHandle();
if (!_depthBoundsEnable)
{
_depthBoundsEnable = true;
//vkCmdSetDepthBoundsTestEnable(cmdBuffer, true);
}
vkCmdSetDepthBounds(cmdBuffer, minDepth, maxDepth);
}
GPUPipelineState* GPUContextVulkan::GetState() const
{
return _currentState;

View File

@@ -78,6 +78,7 @@ private:
int32 _psDirtyFlag : 1;
int32 _rtDirtyFlag : 1;
int32 _cbDirtyFlag : 1;
int32 _depthBoundsEnable : 1;
int32 _rtCount;
int32 _vbCount;
@@ -193,6 +194,7 @@ public:
void EndQuery(uint64 queryID) override;
void SetViewport(const Viewport& viewport) override;
void SetScissor(const Rectangle& scissorRect) override;
void SetDepthBounds(float minDepth, float maxDepth) override;
GPUPipelineState* GetState() const override;
void SetState(GPUPipelineState* state) override;
void ResetState() override;

View File

@@ -1748,6 +1748,7 @@ bool GPUDeviceVulkan::Init()
limits.HasAppendConsumeBuffers = false; // TODO: add Append Consume buffers support for Vulkan
limits.HasSeparateRenderTargetBlendState = true;
limits.HasDepthClip = PhysicalDeviceFeatures.depthClamp;
limits.HasDepthBounds = PhysicalDeviceFeatures.depthBounds;
limits.HasDepthAsSRV = true;
limits.HasReadOnlyDepth = true;
limits.HasMultisampleDepthAsSRV = !!PhysicalDeviceFeatures.sampleRateShading;

View File

@@ -443,13 +443,15 @@ bool GPUPipelineStateVulkan::Init(const Description& desc)
_dynamicStates[_descDynamic.dynamicStateCount++] = VK_DYNAMIC_STATE_VIEWPORT;
_dynamicStates[_descDynamic.dynamicStateCount++] = VK_DYNAMIC_STATE_SCISSOR;
_dynamicStates[_descDynamic.dynamicStateCount++] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
if (desc.DepthBoundsEnable)
_dynamicStates[_descDynamic.dynamicStateCount++] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
#define IsBlendUsingBlendFactor(blend) blend == BlendingMode::Blend::BlendFactor || blend == BlendingMode::Blend::BlendInvFactor
if (desc.BlendMode.BlendEnable && (
IsBlendUsingBlendFactor(desc.BlendMode.SrcBlend) || IsBlendUsingBlendFactor(desc.BlendMode.SrcBlendAlpha) ||
IsBlendUsingBlendFactor(desc.BlendMode.DestBlend) || IsBlendUsingBlendFactor(desc.BlendMode.DestBlendAlpha)))
_dynamicStates[_descDynamic.dynamicStateCount++] = VK_DYNAMIC_STATE_BLEND_CONSTANTS;
#undef IsBlendUsingBlendFactor
static_assert(ARRAY_COUNT(_dynamicStates) <= 4, "Invalid dynamic states array.");
static_assert(ARRAY_COUNT(_dynamicStates) >= 5, "Invalid dynamic states array.");
_desc.pDynamicState = &_descDynamic;
// Multisample
@@ -462,6 +464,9 @@ bool GPUPipelineStateVulkan::Init(const Description& desc)
RenderToolsVulkan::ZeroStruct(_descDepthStencil, VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO);
_descDepthStencil.depthTestEnable = desc.DepthEnable;
_descDepthStencil.depthWriteEnable = desc.DepthWriteEnable;
_descDepthStencil.depthBoundsTestEnable = desc.DepthBoundsEnable;
_descDepthStencil.minDepthBounds = 0.0f;
_descDepthStencil.maxDepthBounds = 1.0f; // TODO: inverse depth buffer rendering
_descDepthStencil.depthCompareOp = RenderToolsVulkan::ToVulkanCompareOp(desc.DepthFunc);
_descDepthStencil.stencilTestEnable = desc.StencilEnable;
_descDepthStencil.front.compareMask = desc.StencilReadMask;
@@ -474,6 +479,7 @@ bool GPUPipelineStateVulkan::Init(const Description& desc)
_desc.pDepthStencilState = &_descDepthStencil;
DepthReadEnable = desc.DepthEnable && desc.DepthFunc != ComparisonFunc::Always;
DepthWriteEnable = _descDepthStencil.depthWriteEnable;
DepthBoundsEnable = _descDepthStencil.depthBoundsTestEnable;
StencilReadEnable = desc.StencilEnable && desc.StencilReadMask != 0 && desc.StencilFunc != ComparisonFunc::Always;
StencilWriteEnable = desc.StencilEnable && desc.StencilWriteMask != 0;

View File

@@ -99,7 +99,7 @@ private:
#endif
VkPipelineViewportStateCreateInfo _descViewport;
VkPipelineDynamicStateCreateInfo _descDynamic;
VkDynamicState _dynamicStates[4];
VkDynamicState _dynamicStates[5];
VkPipelineMultisampleStateCreateInfo _descMultisample;
VkPipelineDepthStencilStateCreateInfo _descDepthStencil;
VkPipelineRasterizationStateCreateInfo _descRasterization;
@@ -123,6 +123,7 @@ public:
uint32 BlendEnable : 1;
uint32 DepthReadEnable : 1;
uint32 DepthWriteEnable : 1;
uint32 DepthBoundsEnable : 1;
uint32 StencilReadEnable : 1;
uint32 StencilWriteEnable : 1;