diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.h index 48811019a..4d9c367f8 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.h @@ -28,6 +28,7 @@ public: GPUBufferViewDX12() { SrvDimension = D3D12_SRV_DIMENSION_BUFFER; + UavDimension = D3D12_UAV_DIMENSION_BUFFER; } /// diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp index 3fd85fad8..8b085d374 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.cpp @@ -70,8 +70,6 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty , _rbBufferSize(0) , _srMaskDirtyGraphics(0) , _srMaskDirtyCompute(0) - , _uaMaskDirtyGraphics(0) - , _uaMaskDirtyCompute(0) , _isCompute(0) , _rtDirtyFlag(0) , _psDirtyFlag(0) @@ -208,8 +206,6 @@ void GPUContextDX12::Reset() _rtDepth = nullptr; _srMaskDirtyGraphics = 0; _srMaskDirtyCompute = 0; - _uaMaskDirtyGraphics = 0; - _uaMaskDirtyCompute = 0; _psDirtyFlag = false; _isCompute = false; _currentCompute = nullptr; @@ -309,7 +305,7 @@ void GPUContextDX12::flushSRVs() { const auto handle = _srHandles[i]; const auto dimensions = (D3D12_SRV_DIMENSION)header.SrDimensions[i]; - if (srMask & (1 << i) && handle != nullptr && dimensions) + if (srMask & (1 << i) && handle != nullptr) { ASSERT(handle->SrvDimension == dimensions); srcDescriptorRangeStarts[i] = handle->SRV(); @@ -379,35 +375,30 @@ void GPUContextDX12::flushUAVs() if (_isCompute) { // Skip if no compute shader binded or it doesn't use shader resources - if (_uaMaskDirtyCompute == 0 || _currentCompute == nullptr || (uaMask = _currentCompute->GetBindings().UsedUAsMask) == 0) + if ((uaMask = _currentCompute->GetBindings().UsedUAsMask) == 0) return; - - // Bind all dirty slots and all used slots - uaMask |= _uaMaskDirtyCompute; - _uaMaskDirtyCompute = 0; } else { // Skip if no state binded or it doesn't use shader resources - if (_uaMaskDirtyGraphics == 0 || _currentState == nullptr || (uaMask = _currentState->GetUsedUAsMask()) == 0) + if (_currentState == nullptr || (uaMask = _currentState->GetUsedUAsMask()) == 0) return; - - // Bind all dirty slots and all used slots - uaMask |= _uaMaskDirtyGraphics; - _uaMaskDirtyGraphics = 0; } // Count UAVs required to be bind to the pipeline (the index of the most significant bit that's set) const uint32 uaCount = Math::FloorLog2(uaMask) + 1; - ASSERT(uaCount <= GPU_MAX_UA_BINDED); + ASSERT(uaCount <= GPU_MAX_UA_BINDED + 1); // Fill table with source descriptors - D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED]; + DxShaderHeader& header = _currentCompute ? ((GPUShaderProgramCSDX12*)_currentCompute)->Header : _currentState->Header; + D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[GPU_MAX_UA_BINDED + 1]; for (uint32 i = 0; i < uaCount; i++) { const auto handle = _uaHandles[i]; - if (handle != nullptr) + const auto dimensions = (D3D12_UAV_DIMENSION)header.UaDimensions[i]; + if (uaMask & (1 << i) && handle != nullptr) { + ASSERT(handle->UavDimension == dimensions); srcDescriptorRangeStarts[i] = handle->UAV(); SetResourceState(handle->GetResourceOwner(), D3D12_RESOURCE_STATE_UNORDERED_ACCESS); } @@ -752,15 +743,8 @@ void GPUContextDX12::SetRenderTarget(GPUTextureView* rt, GPUBuffer* uaOutput) // Set render target normally SetRenderTarget(nullptr, rt); - // Bind UAV output to the last slot - const int32 slot = ARRAY_COUNT(_uaHandles) - 1; - IShaderResourceDX12** lastSlot = &_uaHandles[slot]; - if (*lastSlot != uaOutputDX12) - { - *lastSlot = uaOutputDX12; - _srMaskDirtyGraphics |= 1 << slot; - _srMaskDirtyCompute |= 1 << slot; - } + // Bind UAV output to the 2nd slot (after render target to match DX11 binding model) + _uaHandles[1] = uaOutputDX12; } void GPUContextDX12::ResetSR() @@ -824,13 +808,7 @@ void GPUContextDX12::BindSR(int32 slot, GPUResourceView* view) void GPUContextDX12::BindUA(int32 slot, GPUResourceView* view) { ASSERT(slot >= 0 && slot < GPU_MAX_UA_BINDED); - auto handle = view ? (IShaderResourceDX12*)view->GetNativePtr() : nullptr; - if (_uaHandles[slot] != handle || !handle) - { - _uaMaskDirtyGraphics |= 1 << slot; - _uaMaskDirtyCompute |= 1 << slot; - _uaHandles[slot] = handle; - } + _uaHandles[slot] = view ? (IShaderResourceDX12*)view->GetNativePtr() : nullptr; } void GPUContextDX12::BindVB(const Span& vertexBuffers, const uint32* vertexBuffersOffsets) diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h index 7e9994add..1551edc76 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUContextDX12.h @@ -47,9 +47,6 @@ private: uint32 _srMaskDirtyGraphics; uint32 _srMaskDirtyCompute; - uint32 _uaMaskDirtyGraphics; - uint32 _uaMaskDirtyCompute; - int32 _isCompute : 1; int32 _rtDirtyFlag : 1; int32 _psDirtyFlag : 1; @@ -58,7 +55,7 @@ private: GPUTextureViewDX12* _rtDepth; GPUTextureViewDX12* _rtHandles[GPU_MAX_RT_BINDED]; IShaderResourceDX12* _srHandles[GPU_MAX_SR_BINDED]; - IShaderResourceDX12* _uaHandles[GPU_MAX_UA_BINDED + 1]; + IShaderResourceDX12* _uaHandles[GPU_MAX_UA_BINDED]; GPUBufferDX12* _ibHandle; GPUBufferDX12* _vbHandles[GPU_MAX_VB_BINDED]; D3D12_INDEX_BUFFER_VIEW _ibView; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp index edaceba26..40f860a87 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUDeviceDX12.cpp @@ -467,12 +467,11 @@ bool GPUDeviceDX12::Init() { D3D12_DESCRIPTOR_RANGE& range = r[1]; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - range.NumDescriptors = GPU_MAX_UA_BINDED + 1; // the last (additional) UAV register is used as a UAV output (hidden internally) + range.NumDescriptors = GPU_MAX_UA_BINDED; range.BaseShaderRegister = 0; range.RegisterSpace = 0; range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; } - static_assert(GPU_MAX_UA_BINDED == 2, "DX12 backend uses hardcoded single UAV register slot. Update code to support more."); // Root parameters D3D12_ROOT_PARAMETER rootParameters[4]; diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp index 08928e94b..0491f8ca2 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUPipelineStateDX12.cpp @@ -139,6 +139,10 @@ bool GPUPipelineStateDX12::Init(const Description& desc) for (uint32 i = 0; i < srCount; i++) \ if (shader->Header.SrDimensions[i]) \ Header.SrDimensions[i] = shader->Header.SrDimensions[i]; \ + auto uaCount = Math::FloorLog2(shader->GetBindings().UsedUAsMask) + 1; \ + for (uint32 i = 0; i < uaCount; i++) \ + if (shader->Header.UaDimensions[i]) \ + Header.UaDimensions[i] = shader->Header.UaDimensions[i]; \ } INIT_SHADER_STAGE(HS, GPUShaderProgramHSDX12); INIT_SHADER_STAGE(DS, GPUShaderProgramDSDX12); diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp index 37058884a..1044ff41b 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUTextureDX12.cpp @@ -280,6 +280,7 @@ void GPUTextureViewDX12::SetDSV(D3D12_DEPTH_STENCIL_VIEW_DESC& dsvDesc) void GPUTextureViewDX12::SetUAV(D3D12_UNORDERED_ACCESS_VIEW_DESC& uavDesc, ID3D12Resource* counterResource) { + UavDimension = uavDesc.ViewDimension; _uav.CreateUAV(_device, _owner->GetResource(), &uavDesc, counterResource); } diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/IShaderResourceDX12.h b/Source/Engine/GraphicsDevice/DirectX/DX12/IShaderResourceDX12.h index 445d83f2a..64c6cf580 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/IShaderResourceDX12.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/IShaderResourceDX12.h @@ -33,6 +33,7 @@ public: int32 SubresourceIndex; D3D12_SRV_DIMENSION SrvDimension = D3D12_SRV_DIMENSION_UNKNOWN; + D3D12_UAV_DIMENSION UavDimension = D3D12_UAV_DIMENSION_UNKNOWN; public: diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/Types.h b/Source/Engine/GraphicsDevice/DirectX/DX12/Types.h index a4f49d793..2a39c400d 100644 --- a/Source/Engine/GraphicsDevice/DirectX/DX12/Types.h +++ b/Source/Engine/GraphicsDevice/DirectX/DX12/Types.h @@ -13,6 +13,11 @@ struct DxShaderHeader /// byte SrDimensions[32]; + /// + /// The UAV dimensions per-slot. + /// + byte UaDimensions[4]; + // .. rest is just a actual data array }; diff --git a/Source/Engine/ShadersCompilation/DirectX/ShaderCompilerDX.cpp b/Source/Engine/ShadersCompilation/DirectX/ShaderCompilerDX.cpp index e557a1b98..c03c10ae7 100644 --- a/Source/Engine/ShadersCompilation/DirectX/ShaderCompilerDX.cpp +++ b/Source/Engine/ShadersCompilation/DirectX/ShaderCompilerDX.cpp @@ -379,6 +379,30 @@ bool ShaderCompilerDX::CompileShader(ShaderFunctionMeta& meta, WritePermutationD case D3D_SIT_UAV_CONSUME_STRUCTURED: case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: bindings.UsedUAsMask |= 1 << resDesc.BindPoint; + switch (resDesc.Dimension) + { + case D3D_SRV_DIMENSION_BUFFER: + header.UaDimensions[resDesc.BindPoint] = 1; // D3D12_UAV_DIMENSION_BUFFER; + break; + case D3D_SRV_DIMENSION_TEXTURE1D: + header.UaDimensions[resDesc.BindPoint] = 2; // D3D12_UAV_DIMENSION_TEXTURE1D; + break; + case D3D_SRV_DIMENSION_TEXTURE1DARRAY: + header.UaDimensions[resDesc.BindPoint] = 3; // D3D12_UAV_DIMENSION_TEXTURE1DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE2D: + header.UaDimensions[resDesc.BindPoint] = 4; // D3D12_UAV_DIMENSION_TEXTURE2D; + break; + case D3D_SRV_DIMENSION_TEXTURE2DARRAY: + header.UaDimensions[resDesc.BindPoint] = 5; // D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + break; + case D3D_SRV_DIMENSION_TEXTURE3D: + header.UaDimensions[resDesc.BindPoint] = 8; // D3D12_UAV_DIMENSION_TEXTURE3D; + break; + default: + LOG(Error, "Unknown UAV resource {2} of type {0} at slot {1}", resDesc.Dimension, resDesc.BindPoint, String(resDesc.Name)); + return true; + } break; } }