Add support for programmable samplers in shaders

This commit is contained in:
Wojtek Figat
2021-06-28 15:56:32 +02:00
parent 971449bef1
commit 3c1fd427eb
31 changed files with 866 additions and 121 deletions

View File

@@ -11,6 +11,7 @@
#include "UploadBufferDX12.h"
#include "GPUTextureDX12.h"
#include "GPUBufferDX12.h"
#include "GPUSamplerDX12.h"
#include "CommandQueueDX12.h"
#include "DescriptorHeapDX12.h"
#include "Engine/Graphics/RenderTask.h"
@@ -74,6 +75,7 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty
, _rtDirtyFlag(0)
, _psDirtyFlag(0)
, _cbDirtyFlag(0)
, _samplersDirtyFlag(0)
, _rtDepth(nullptr)
, _ibHandle(nullptr)
{
@@ -213,6 +215,7 @@ void GPUContextDX12::Reset()
_currentState = nullptr;
_rtDirtyFlag = false;
_cbDirtyFlag = false;
_samplersDirtyFlag = false;
_rtCount = 0;
_rtDepth = nullptr;
_srMaskDirtyGraphics = 0;
@@ -226,8 +229,9 @@ void GPUContextDX12::Reset()
Platform::MemoryClear(_srHandles, sizeof(_srHandles));
Platform::MemoryClear(_uaHandles, sizeof(_uaHandles));
Platform::MemoryClear(_vbHandles, sizeof(_vbHandles));
Platform::MemoryClear(&_ibHandle, sizeof(_ibHandle));
_ibHandle = nullptr;
Platform::MemoryClear(&_cbHandles, sizeof(_cbHandles));
Platform::MemoryClear(&_samplers, sizeof(_samplers));
_swapChainsUsed = 0;
// Bind Root Signature
@@ -235,7 +239,7 @@ void GPUContextDX12::Reset()
_commandList->SetComputeRootSignature(_device->GetRootSignature());
// Bind heaps
ID3D12DescriptorHeap* ppHeaps[] = { _device->RingHeap_CBV_SRV_UAV.GetHeap() };
ID3D12DescriptorHeap* ppHeaps[] = { _device->RingHeap_CBV_SRV_UAV.GetHeap(), _device->RingHeap_Sampler.GetHeap() };
_commandList->SetDescriptorHeaps(ARRAY_COUNT(ppHeaps), ppHeaps);
}
@@ -449,28 +453,58 @@ void GPUContextDX12::flushUAVs()
void GPUContextDX12::flushCBs()
{
// Check if need to flush constant buffers
if (_cbDirtyFlag)
if (!_cbDirtyFlag)
return;
_cbDirtyFlag = false;
for (uint32 slotIndex = 0; slotIndex < ARRAY_COUNT(_cbHandles); slotIndex++)
{
// Clear flag
_cbDirtyFlag = false;
// Flush with the driver
for (uint32 slotIndex = 0; slotIndex < ARRAY_COUNT(_cbHandles); slotIndex++)
auto cb = _cbHandles[slotIndex];
if (cb)
{
auto cb = _cbHandles[slotIndex];
if (cb)
{
ASSERT(cb->GPUAddress != 0);
if (_isCompute)
_commandList->SetComputeRootConstantBufferView(slotIndex, cb->GPUAddress);
else
_commandList->SetGraphicsRootConstantBufferView(slotIndex, cb->GPUAddress);
}
ASSERT(cb->GPUAddress != 0);
if (_isCompute)
_commandList->SetComputeRootConstantBufferView(slotIndex, cb->GPUAddress);
else
_commandList->SetGraphicsRootConstantBufferView(slotIndex, cb->GPUAddress);
}
}
}
void GPUContextDX12::flushSamplers()
{
if (!_samplersDirtyFlag)
return;
_samplersDirtyFlag = false;
int32 lastSlot = -1;
for (int32 slotIndex = ARRAY_COUNT(_samplers) - 1; slotIndex >= 0; slotIndex--)
{
auto sampler = _samplers[slotIndex];
if (sampler)
{
lastSlot = slotIndex;
break;
}
}
if (lastSlot < 0)
return;
const uint32 samplersCount = lastSlot + 1;
D3D12_CPU_DESCRIPTOR_HANDLE srcDescriptorRangeStarts[ARRAY_COUNT(_samplers)];
for (uint32 i = 0; i < samplersCount; i++)
{
const auto handle = _samplers[i];
if (handle != nullptr)
{
srcDescriptorRangeStarts[i] = handle->HandleCPU;
}
}
auto allocation = _device->RingHeap_Sampler.AllocateTable(samplersCount);
_device->GetDevice()->CopyDescriptors(1, &allocation.CPU, &samplersCount, samplersCount, srcDescriptorRangeStarts, nullptr, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
if (_isCompute)
_commandList->SetComputeRootDescriptorTable(4, allocation.GPU);
else
_commandList->SetGraphicsRootDescriptorTable(4, allocation.GPU);
}
void GPUContextDX12::flushRBs()
{
#if DX12_ENABLE_RESOURCE_BARRIERS_BATCHING
@@ -563,6 +597,7 @@ void GPUContextDX12::OnDrawCall()
flushRBs();
flushPS();
flushCBs();
flushSamplers();
#if BUILD_DEBUG
// Additional verification of the state
@@ -835,8 +870,8 @@ void GPUContextDX12::BindUA(int32 slot, GPUResourceView* view)
{
ASSERT(slot >= 0 && slot < GPU_MAX_UA_BINDED);
_uaHandles[slot] = view ? (IShaderResourceDX12*)view->GetNativePtr() : nullptr;
if (view)
*view->LastRenderTime = _lastRenderTime;
if (view)
*view->LastRenderTime = _lastRenderTime;
}
void GPUContextDX12::BindVB(const Span<GPUBuffer*>& vertexBuffers, const uint32* vertexBuffersOffsets)
@@ -890,6 +925,17 @@ void GPUContextDX12::BindIB(GPUBuffer* indexBuffer)
}
}
void GPUContextDX12::BindSampler(int32 slot, GPUSampler* sampler)
{
ASSERT(slot >= GPU_STATIC_SAMPLERS_COUNT && slot < GPU_MAX_SAMPLER_BINDED);
const auto handle = sampler ? static_cast<GPUSamplerDX12*>(sampler) : nullptr;
if (_samplers[slot - GPU_STATIC_SAMPLERS_COUNT] != handle)
{
_samplersDirtyFlag = true;
_samplers[slot - GPU_STATIC_SAMPLERS_COUNT] = handle;
}
}
void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data)
{
ASSERT(data && cb);
@@ -927,6 +973,7 @@ void GPUContextDX12::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCoun
flushUAVs();
flushRBs();
flushCBs();
flushSamplers();
auto shaderDX12 = (GPUShaderProgramCSDX12*)shader;
auto computeState = shaderDX12->GetOrCreateState();
@@ -959,6 +1006,7 @@ void GPUContextDX12::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* buf
flushUAVs();
flushRBs();
flushCBs();
flushSamplers();
auto shaderDX12 = (GPUShaderProgramCSDX12*)shader;
auto computeState = shaderDX12->GetOrCreateState();

View File

@@ -12,6 +12,7 @@
class GPUDeviceDX12;
class GPUPipelineStateDX12;
class GPUBufferDX12;
class GPUSamplerDX12;
class GPUConstantBufferDX12;
class GPUTextureViewDX12;
@@ -51,6 +52,7 @@ private:
int32 _rtDirtyFlag : 1;
int32 _psDirtyFlag : 1;
int32 _cbDirtyFlag : 1;
int32 _samplersDirtyFlag : 1;
GPUTextureViewDX12* _rtDepth;
GPUTextureViewDX12* _rtHandles[GPU_MAX_RT_BINDED];
@@ -62,6 +64,7 @@ private:
D3D12_VERTEX_BUFFER_VIEW _vbViews[GPU_MAX_VB_BINDED];
D3D12_RESOURCE_BARRIER _rbBuffer[DX12_RB_BUFFER_SIZE];
GPUConstantBufferDX12* _cbHandles[GPU_MAX_CB_BINDED];
GPUSamplerDX12* _samplers[GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT];
public:
@@ -136,6 +139,7 @@ private:
void flushRTVs();
void flushUAVs();
void flushCBs();
void flushSamplers();
void flushRBs();
void flushPS();
void OnDrawCall();
@@ -167,6 +171,7 @@ public:
void BindUA(int32 slot, GPUResourceView* view) override;
void BindVB(const Span<GPUBuffer*>& vertexBuffers, const uint32* vertexBuffersOffsets = nullptr) override;
void BindIB(GPUBuffer* indexBuffer) override;
void BindSampler(int32 slot, GPUSampler* sampler) override;
void UpdateCB(GPUConstantBuffer* cb, const void* data) override;
void Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ) override;
void DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs) override;

View File

@@ -9,6 +9,7 @@
#include "GPUTextureDX12.h"
#include "GPUTimerQueryDX12.h"
#include "GPUBufferDX12.h"
#include "GPUSamplerDX12.h"
#include "GPUSwapChainDX12.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Engine/CommandLine.h"
@@ -191,7 +192,9 @@ GPUDeviceDX12::GPUDeviceDX12(IDXGIFactory4* dxgiFactory, GPUAdapterDX* adapter)
, Heap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 4 * 1024, false)
, Heap_RTV(this, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1 * 1024, false)
, Heap_DSV(this, D3D12_DESCRIPTOR_HEAP_TYPE_DSV, 64, false)
, Heap_Sampler(this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 128, false)
, RingHeap_CBV_SRV_UAV(this, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 512 * 1024, true)
, RingHeap_Sampler(this, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, 1 * 1024, true)
{
}
@@ -350,6 +353,7 @@ bool GPUDeviceDX12::Init()
limits.MaximumTexture2DArraySize = D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION;
limits.MaximumTexture3DSize = D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION;
limits.MaximumTextureCubeSize = D3D12_REQ_TEXTURECUBE_DIMENSION;
limits.MaximumSamplerAnisotropy = D3D12_DEFAULT_MAX_ANISOTROPY;
for (int32 i = 0; i < static_cast<int32>(PixelFormat::MAX); i++)
{
@@ -379,6 +383,8 @@ bool GPUDeviceDX12::Init()
_mainContext = New<GPUContextDX12>(this, D3D12_COMMAND_LIST_TYPE_DIRECT);
if (RingHeap_CBV_SRV_UAV.Init())
return true;
if (RingHeap_Sampler.Init())
return true;
// Create empty views
D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc;
@@ -458,7 +464,7 @@ bool GPUDeviceDX12::Init()
// TODO: maybe create set of different root signatures? for UAVs, for compute, for simple drawing, for post fx?
{
// Descriptor tables
D3D12_DESCRIPTOR_RANGE r[2];
D3D12_DESCRIPTOR_RANGE r[3];
// TODO: separate ranges for pixel/vertex visibility and one shared for all?
{
D3D12_DESCRIPTOR_RANGE& range = r[0];
@@ -476,9 +482,17 @@ bool GPUDeviceDX12::Init()
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
}
{
D3D12_DESCRIPTOR_RANGE& range = r[2];
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
range.NumDescriptors = GPU_MAX_SAMPLER_BINDED - GPU_STATIC_SAMPLERS_COUNT;
range.BaseShaderRegister = GPU_STATIC_SAMPLERS_COUNT;
range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
}
// Root parameters
D3D12_ROOT_PARAMETER rootParameters[4];
D3D12_ROOT_PARAMETER rootParameters[5];
{
D3D12_ROOT_PARAMETER& rootParam = rootParameters[0];
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
@@ -507,11 +521,18 @@ bool GPUDeviceDX12::Init()
rootParam.DescriptorTable.NumDescriptorRanges = 1;
rootParam.DescriptorTable.pDescriptorRanges = &r[1];
}
// TODO: describe visibilities for the static samples, maybe use all pixel? or again pixel + all combo?
{
D3D12_ROOT_PARAMETER& rootParam = rootParameters[4];
rootParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
rootParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
rootParam.DescriptorTable.NumDescriptorRanges = 1;
rootParam.DescriptorTable.pDescriptorRanges = &r[2];
}
// Static samplers
D3D12_STATIC_SAMPLER_DESC staticSamplers[6];
static_assert(GPU_STATIC_SAMPLERS_COUNT == ARRAY_COUNT(staticSamplers), "Update static samplers setup.");
// TODO: describe visibilities for the static samples, maybe use all pixel? or again pixel + all combo?
// Linear Clamp
staticSamplers[0].Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
staticSamplers[0].AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
@@ -718,7 +739,9 @@ void GPUDeviceDX12::Dispose()
Heap_CBV_SRV_UAV.ReleaseGPU();
Heap_RTV.ReleaseGPU();
Heap_DSV.ReleaseGPU();
Heap_Sampler.ReleaseGPU();
RingHeap_CBV_SRV_UAV.ReleaseGPU();
RingHeap_Sampler.ReleaseGPU();
SAFE_DELETE(UploadBuffer);
SAFE_DELETE(DrawIndirectCommandSignature);
SAFE_DELETE(_mainContext);
@@ -766,6 +789,11 @@ GPUBuffer* GPUDeviceDX12::CreateBuffer(const StringView& name)
return New<GPUBufferDX12>(this, name);
}
GPUSampler* GPUDeviceDX12::CreateSampler()
{
return New<GPUSamplerDX12>(this);
}
GPUSwapChain* GPUDeviceDX12::CreateSwapChain(Window* window)
{
return New<GPUSwapChainDX12>(this, window);

View File

@@ -141,7 +141,9 @@ public:
DescriptorHeapPoolDX12 Heap_CBV_SRV_UAV;
DescriptorHeapPoolDX12 Heap_RTV;
DescriptorHeapPoolDX12 Heap_DSV;
DescriptorHeapPoolDX12 Heap_Sampler;
DescriptorHeapRingBufferDX12 RingHeap_CBV_SRV_UAV;
DescriptorHeapRingBufferDX12 RingHeap_Sampler;
public:
@@ -188,6 +190,7 @@ public:
GPUPipelineState* CreatePipelineState() override;
GPUTimerQuery* CreateTimerQuery() override;
GPUBuffer* CreateBuffer(const StringView& name) override;
GPUSampler* CreateSampler() override;
GPUSwapChain* CreateSwapChain(Window* window) override;
};

View File

@@ -0,0 +1,124 @@
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
#if GRAPHICS_API_DIRECTX12
#include "GPUSamplerDX12.h"
D3D12_TEXTURE_ADDRESS_MODE ToDX12(GPUSamplerAddressMode value)
{
switch (value)
{
case GPUSamplerAddressMode::Wrap:
return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
case GPUSamplerAddressMode::Clamp:
return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
case GPUSamplerAddressMode::Mirror:
return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
case GPUSamplerAddressMode::Border:
return D3D12_TEXTURE_ADDRESS_MODE_BORDER;
default:
return (D3D12_TEXTURE_ADDRESS_MODE)-1;
}
}
bool GPUSamplerDX12::OnInit()
{
D3D12_SAMPLER_DESC samplerDesc;
if (_desc.ComparisonFunction == GPUSamplerCompareFunction::Never)
{
switch (_desc.Filter)
{
case GPUSamplerFilter::Point:
samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
break;
case GPUSamplerFilter::Bilinear:
samplerDesc.Filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
break;
case GPUSamplerFilter::Trilinear:
samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
break;
case GPUSamplerFilter::Anisotropic:
samplerDesc.Filter = D3D12_FILTER_ANISOTROPIC;
break;
default:
return true;
}
}
else
{
switch (_desc.Filter)
{
case GPUSamplerFilter::Point:
samplerDesc.Filter = D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT;
break;
case GPUSamplerFilter::Bilinear:
samplerDesc.Filter = D3D12_FILTER_COMPARISON_MIN_LINEAR_MAG_MIP_POINT;
break;
case GPUSamplerFilter::Trilinear:
samplerDesc.Filter = D3D12_FILTER_COMPARISON_MIN_MAG_MIP_LINEAR;
break;
case GPUSamplerFilter::Anisotropic:
samplerDesc.Filter = D3D12_FILTER_COMPARISON_ANISOTROPIC;
break;
default:
return true;
}
}
samplerDesc.AddressU = ToDX12(_desc.AddressU);
samplerDesc.AddressV = ToDX12(_desc.AddressV);
samplerDesc.AddressW = ToDX12(_desc.AddressW);
samplerDesc.MipLODBias = _desc.MipBias;
samplerDesc.MaxAnisotropy = _desc.MaxAnisotropy;
switch (_desc.ComparisonFunction)
{
case GPUSamplerCompareFunction::Never:
samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
break;
case GPUSamplerCompareFunction::Less:
samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_LESS;
break;
default:
return true;
}
switch (_desc.BorderColor)
{
case GPUSamplerBorderColor::TransparentBlack:
samplerDesc.BorderColor[0] = 0;
samplerDesc.BorderColor[1] = 0;
samplerDesc.BorderColor[2] = 0;
samplerDesc.BorderColor[3] = 0;
break;
case GPUSamplerBorderColor::OpaqueBlack:
samplerDesc.BorderColor[0] = 0;
samplerDesc.BorderColor[1] = 0;
samplerDesc.BorderColor[2] = 0;
samplerDesc.BorderColor[3] = 1.0f;
break;
case GPUSamplerBorderColor::OpaqueWhite:
samplerDesc.BorderColor[0] = 1.0f;
samplerDesc.BorderColor[1] = 1.0f;
samplerDesc.BorderColor[2] = 1.0f;
samplerDesc.BorderColor[3] = 1.0f;
break;
default:
return true;
}
samplerDesc.MinLOD = _desc.MinMipLevel;
samplerDesc.MaxLOD = _desc.MaxMipLevel;
_device->Heap_Sampler.AllocateSlot(Slot.Heap, Slot.Index);
HandleCPU = Slot.CPU();
_device->GetDevice()->CreateSampler(&samplerDesc, HandleCPU);
_memoryUsage = sizeof(D3D12_SAMPLER_DESC);
return false;
}
void GPUSamplerDX12::OnReleaseGPU()
{
Slot.Release();
// Base
GPUSampler::OnReleaseGPU();
}
#endif

View File

@@ -0,0 +1,32 @@
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Graphics/Textures/GPUSampler.h"
#include "GPUDeviceDX12.h"
#if GRAPHICS_API_DIRECTX12
/// <summary>
/// Sampler object for DirectX 12 backend.
/// </summary>
class GPUSamplerDX12 : public GPUResourceDX12<GPUSampler>
{
public:
GPUSamplerDX12(GPUDeviceDX12* device)
: GPUResourceDX12<GPUSampler>(device, StringView::Empty)
{
}
DescriptorHeapWithSlotsDX12::Slot Slot;
D3D12_CPU_DESCRIPTOR_HANDLE HandleCPU;
protected:
// [GPUSampler]
bool OnInit() override;
void OnReleaseGPU() override;
};
#endif