Add SRV slots isage info for D3D12 shaders cache

This commit is contained in:
Wojtek Figat
2021-06-07 12:19:49 +02:00
parent fa1cd56a0d
commit f4c8808d19
8 changed files with 193 additions and 186 deletions

View File

@@ -12,7 +12,7 @@ class GPUShaderProgram;
/// <summary>
/// The runtime version of the shaders cache supported by the all graphics back-ends. The same for all the shader cache formats (easier to sync and validate).
/// </summary>
#define GPU_SHADER_CACHE_VERSION 7
#define GPU_SHADER_CACHE_VERSION 8
/// <summary>
/// Represents collection of shader programs with permutations and custom names.

View File

@@ -3,6 +3,7 @@
#if GRAPHICS_API_DIRECTX12
#include "GPUPipelineStateDX12.h"
#include "GPUShaderProgramDX12.h"
#include "GPUTextureDX12.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/GraphicsDevice/DirectX/RenderToolsDX.h"
@@ -63,6 +64,44 @@ ID3D12PipelineState* GPUPipelineStateDX12::GetState(GPUTextureViewDX12* depth, i
LOG_DIRECTX_RESULT(result);
if (FAILED(result))
return nullptr;
#if GPU_ENABLE_RESOURCE_NAMING
char name[200];
int32 nameLen = 0;
if (DebugDesc.VS)
{
Platform::MemoryCopy(name + nameLen, *DebugDesc.VS->GetName(), DebugDesc.VS->GetName().Length());
nameLen += DebugDesc.VS->GetName().Length();
name[nameLen++] = '+';
}
if (DebugDesc.HS)
{
Platform::MemoryCopy(name + nameLen, *DebugDesc.HS->GetName(), DebugDesc.HS->GetName().Length());
nameLen += DebugDesc.HS->GetName().Length();
name[nameLen++] = '+';
}
if (DebugDesc.DS)
{
Platform::MemoryCopy(name + nameLen, *DebugDesc.DS->GetName(), DebugDesc.DS->GetName().Length());
nameLen += DebugDesc.DS->GetName().Length();
name[nameLen++] = '+';
}
if (DebugDesc.GS)
{
Platform::MemoryCopy(name + nameLen, *DebugDesc.GS->GetName(), DebugDesc.GS->GetName().Length());
nameLen += DebugDesc.GS->GetName().Length();
name[nameLen++] = '+';
}
if (DebugDesc.PS)
{
Platform::MemoryCopy(name + nameLen, *DebugDesc.PS->GetName(), DebugDesc.PS->GetName().Length());
nameLen += DebugDesc.PS->GetName().Length();
name[nameLen++] = '+';
}
if (nameLen && name[nameLen - 1] == '+')
nameLen--;
name[nameLen] = '\0';
SetDebugObjectName(state, name);
#endif
// Cache it
_states.Add(key, state);
@@ -89,16 +128,23 @@ bool GPUPipelineStateDX12::Init(const Description& desc)
psDesc.pRootSignature = _device->GetRootSignature();
// Shaders
Platform::MemoryClear(&Header, sizeof(Header));
psDesc.InputLayout = { static_cast<D3D12_INPUT_ELEMENT_DESC*>(desc.VS->GetInputLayout()), desc.VS->GetInputLayoutSize() };
psDesc.VS = { desc.VS->GetBufferHandle(), desc.VS->GetBufferSize() };
if (desc.HS)
psDesc.HS = { desc.HS->GetBufferHandle(), desc.HS->GetBufferSize() };
if (desc.DS)
psDesc.DS = { desc.DS->GetBufferHandle(), desc.DS->GetBufferSize() };
if (desc.GS)
psDesc.GS = { desc.GS->GetBufferHandle(), desc.GS->GetBufferSize() };
if (desc.PS)
psDesc.PS = { desc.PS->GetBufferHandle(), desc.PS->GetBufferSize() };
#define INIT_SHADER_STAGE(stage, type) \
if (desc.stage) \
{ \
psDesc.stage = { desc.stage->GetBufferHandle(), desc.stage->GetBufferSize() }; \
auto shader = (type*)desc.stage; \
auto srCount = Math::FloorLog2(shader->GetBindings().UsedSRsMask) + 1; \
for (uint32 i = 0; i < srCount; i++) \
if (shader->Header.SrDimensions[i]) \
Header.SrDimensions[i] = shader->Header.SrDimensions[i]; \
}
INIT_SHADER_STAGE(HS, GPUShaderProgramHSDX12);
INIT_SHADER_STAGE(DS, GPUShaderProgramDSDX12);
INIT_SHADER_STAGE(GS, GPUShaderProgramGSDX12);
INIT_SHADER_STAGE(VS, GPUShaderProgramVSDX12);
INIT_SHADER_STAGE(PS, GPUShaderProgramPSDX12);
const static D3D12_PRIMITIVE_TOPOLOGY_TYPE primTypes1[] =
{
D3D12_PRIMITIVE_TOPOLOGY_TYPE_UNDEFINED,

View File

@@ -6,6 +6,7 @@
#include "Engine/Graphics/GPUPipelineState.h"
#include "GPUDeviceDX12.h"
#include "Types.h"
#include "../IncludeDirectXHeaders.h"
class GPUTextureViewDX12;
@@ -50,18 +51,12 @@ private:
public:
/// <summary>
/// Init
/// </summary>
/// <param name="device">Graphics Device</param>
GPUPipelineStateDX12(GPUDeviceDX12* device);
public:
/// <summary>
/// Direct3D primitive topology
/// </summary>
D3D_PRIMITIVE_TOPOLOGY PrimitiveTopologyType = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED;
DxShaderHeader Header;
/// <summary>
/// Gets DirectX 12 graphics pipeline state object for the given rendering state. Uses depth buffer and render targets formats and multi-sample levels to setup a proper PSO. Uses caching.

View File

@@ -5,10 +5,16 @@
#include "GPUShaderDX12.h"
#include "Engine/Serialization/MemoryReadStream.h"
#include "GPUShaderProgramDX12.h"
#include "Types.h"
#include "../RenderToolsDX.h"
GPUShaderProgram* GPUShaderDX12::CreateGPUShaderProgram(ShaderStage type, const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize, MemoryReadStream& stream)
{
// Extract the DX shader header from the cache
DxShaderHeader* header = (DxShaderHeader*)cacheBytes;
cacheBytes += sizeof(DxShaderHeader);
cacheSize -= sizeof(DxShaderHeader);
GPUShaderProgram* shader = nullptr;
switch (type)
{
@@ -87,34 +93,34 @@ GPUShaderProgram* GPUShaderDX12::CreateGPUShaderProgram(ShaderStage type, const
}
// Create object
shader = New<GPUShaderProgramVSDX12>(initializer, cacheBytes, cacheSize, inputLayout, inputLayoutSize);
shader = New<GPUShaderProgramVSDX12>(initializer, header, cacheBytes, cacheSize, inputLayout, inputLayoutSize);
break;
}
case ShaderStage::Hull:
{
int32 controlPointsCount;
stream.ReadInt32(&controlPointsCount);
shader = New<GPUShaderProgramHSDX12>(initializer, cacheBytes, cacheSize, controlPointsCount);
shader = New<GPUShaderProgramHSDX12>(initializer, header, cacheBytes, cacheSize, controlPointsCount);
break;
}
case ShaderStage::Domain:
{
shader = New<GPUShaderProgramDSDX12>(initializer, cacheBytes, cacheSize);
shader = New<GPUShaderProgramDSDX12>(initializer, header, cacheBytes, cacheSize);
break;
}
case ShaderStage::Geometry:
{
shader = New<GPUShaderProgramGSDX12>(initializer, cacheBytes, cacheSize);
shader = New<GPUShaderProgramGSDX12>(initializer, header, cacheBytes, cacheSize);
break;
}
case ShaderStage::Pixel:
{
shader = New<GPUShaderProgramPSDX12>(initializer, cacheBytes, cacheSize);
shader = New<GPUShaderProgramPSDX12>(initializer, header, cacheBytes, cacheSize);
break;
}
case ShaderStage::Compute:
{
shader = New<GPUShaderProgramCSDX12>(_device, initializer, cacheBytes, cacheSize);
shader = New<GPUShaderProgramCSDX12>(_device, initializer, header, cacheBytes, cacheSize);
break;
}
}

View File

@@ -6,6 +6,7 @@
#include "GPUDeviceDX12.h"
#include "Engine/Graphics/Shaders/GPUShaderProgram.h"
#include "Types.h"
#include "../IncludeDirectXHeaders.h"
/// <summary>
@@ -20,18 +21,18 @@ protected:
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramDX11"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
GPUShaderProgramDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize)
GPUShaderProgramDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize)
: Header(*header)
{
BaseType::Init(initializer);
_data.Set(cacheBytes, cacheSize);
}
public:
DxShaderHeader Header;
public:
// [BaseType]
@@ -39,7 +40,6 @@ public:
{
return (void*)_data.Get();
}
uint32 GetBufferSize() const override
{
return _data.Count();
@@ -58,16 +58,8 @@ private:
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramVSDX12"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
/// <param name="inputLayout">The input layout description.</param>
/// <param name="inputLayoutSize">The input layout description size.</param>
GPUShaderProgramVSDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize, D3D12_INPUT_ELEMENT_DESC* inputLayout, byte inputLayoutSize)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramVSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize, D3D12_INPUT_ELEMENT_DESC* inputLayout, byte inputLayoutSize)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
, _inputLayoutSize(inputLayoutSize)
{
for (byte i = 0; i < inputLayoutSize; i++)
@@ -81,7 +73,6 @@ public:
{
return (void*)_inputLayout;
}
byte GetInputLayoutSize() const override
{
return _inputLayoutSize;
@@ -95,15 +86,8 @@ class GPUShaderProgramHSDX12 : public GPUShaderProgramDX12<GPUShaderProgramHS>
{
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramHSDX12"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
/// <param name="controlPointsCount">The control points used by the hull shader for processing.</param>
GPUShaderProgramHSDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize, int32 controlPointsCount)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramHSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize, int32 controlPointsCount)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
{
_controlPointsCount = controlPointsCount;
}
@@ -116,13 +100,8 @@ class GPUShaderProgramDSDX12 : public GPUShaderProgramDX12<GPUShaderProgramDS>
{
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramDSDX12"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheSize">The shader data size.</param>
GPUShaderProgramDSDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramDSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
{
}
};
@@ -134,14 +113,8 @@ class GPUShaderProgramGSDX12 : public GPUShaderProgramDX12<GPUShaderProgramGS>
{
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramGSDX12"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
GPUShaderProgramGSDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramGSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
{
}
};
@@ -153,14 +126,8 @@ class GPUShaderProgramPSDX12 : public GPUShaderProgramDX12<GPUShaderProgramPS>
{
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramPSDX12"/> class.
/// </summary>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
GPUShaderProgramPSDX12(const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramPSDX12(const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
{
}
};
@@ -178,23 +145,13 @@ private:
public:
/// <summary>
/// Initializes a new instance of the <see cref="GPUShaderProgramCSDX12"/> class.
/// </summary>
/// <param name="device">The graphics device.</param>
/// <param name="initializer">The program initialization data.</param>
/// <param name="cacheBytes">The shader data.</param>
/// <param name="cacheSize">The shader data size.</param>
GPUShaderProgramCSDX12(GPUDeviceDX12* device, const GPUShaderProgramInitializer& initializer, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, cacheBytes, cacheSize)
GPUShaderProgramCSDX12(GPUDeviceDX12* device, const GPUShaderProgramInitializer& initializer, DxShaderHeader* header, byte* cacheBytes, uint32 cacheSize)
: GPUShaderProgramDX12(initializer, header, cacheBytes, cacheSize)
, _device(device)
, _state(nullptr)
{
}
/// <summary>
/// Destructor
/// </summary>
~GPUShaderProgramCSDX12()
{
_device->AddResourceToLateRelease(_state);
@@ -205,7 +162,6 @@ public:
/// <summary>
/// Gets DirectX 12 compute pipeline state object
/// </summary>
/// <returns>DirectX 12 compute pipeline state object</returns>
FORCE_INLINE ID3D12PipelineState* GetState() const
{
return _state;
@@ -214,7 +170,6 @@ public:
/// <summary>
/// Gets or creates compute pipeline state for that compute shader.
/// </summary>
/// <returns>DirectX 12 compute pipeline state object</returns>
ID3D12PipelineState* GetOrCreateState();
};

View File

@@ -0,0 +1,19 @@
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
#pragma once
#if COMPILE_WITH_DX_SHADER_COMPILER || GRAPHICS_API_DIRECTX12
#include "../IncludeDirectXHeaders.h"
struct DxShaderHeader
{
/// <summary>
/// The SRV dimensions per-slot.
/// </summary>
byte SrDimensions[32];
// .. rest is just a actual data array
};
#endif

View File

@@ -6,6 +6,7 @@
#include "Engine/Core/Log.h"
#include "Engine/Threading/Threading.h"
#include "Engine/Graphics/Config.h"
#include "Engine/GraphicsDevice/DirectX/DX12/Types.h"
#include "Engine/Utilities/StringConverter.h"
#include "Engine/Platform/Win32/IncludeWindowsHeaders.h"
#include "Engine/Platform/Windows/ComPtr.h"
@@ -112,99 +113,6 @@ ShaderCompilerDX::~ShaderCompilerDX()
containerReflection->Release();
}
namespace
{
bool ProcessShader(ShaderCompilationContext* context, Array<ShaderCompiler::ShaderResourceBuffer>& constantBuffers, ID3D12ShaderReflection* shaderReflection, D3D12_SHADER_DESC& desc, ShaderBindings& bindings)
{
// Extract constant buffers usage information
for (uint32 a = 0; a < desc.ConstantBuffers; a++)
{
// Get CB
auto cb = shaderReflection->GetConstantBufferByIndex(a);
// Get CB description
D3D12_SHADER_BUFFER_DESC cbDesc;
cb->GetDesc(&cbDesc);
// Check buffer type
if (cbDesc.Type == D3D_CT_CBUFFER)
{
// Find CB slot index
int32 slot = INVALID_INDEX;
for (uint32 b = 0; b < desc.BoundResources; b++)
{
D3D12_SHADER_INPUT_BIND_DESC bDesc;
shaderReflection->GetResourceBindingDesc(b, &bDesc);
if (StringUtils::Compare(bDesc.Name, cbDesc.Name) == 0)
{
slot = bDesc.BindPoint;
break;
}
}
if (slot == INVALID_INDEX)
{
context->OnError("Missing bound resource.");
return true;
}
// Set flag
bindings.UsedCBsMask |= 1 << slot;
// Try to add CB to the list
for (int32 b = 0; b < constantBuffers.Count(); b++)
{
auto& cc = constantBuffers[b];
if (cc.Slot == slot)
{
cc.IsUsed = true;
cc.Size = cbDesc.Size;
break;
}
}
}
}
// Extract resources usage
for (uint32 i = 0; i < desc.BoundResources; i++)
{
// Get resource description
D3D12_SHADER_INPUT_BIND_DESC resDesc;
shaderReflection->GetResourceBindingDesc(i, &resDesc);
switch (resDesc.Type)
{
// Sampler
case D3D_SIT_SAMPLER:
break;
// Constant Buffer
case D3D_SIT_CBUFFER:
case D3D_SIT_TBUFFER:
break;
// Shader Resource
case D3D_SIT_TEXTURE:
case D3D_SIT_STRUCTURED:
case D3D_SIT_BYTEADDRESS:
bindings.UsedSRsMask |= 1 << resDesc.BindPoint;
break;
// Unordered Access
case D3D_SIT_UAV_RWTYPED:
case D3D_SIT_UAV_RWSTRUCTURED:
case D3D_SIT_UAV_RWBYTEADDRESS:
case D3D_SIT_UAV_APPEND_STRUCTURED:
case D3D_SIT_UAV_CONSUME_STRUCTURED:
case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER:
bindings.UsedUAsMask |= 1 << resDesc.BindPoint;
break;
}
}
return false;
}
}
bool ShaderCompilerDX::CompileShader(ShaderFunctionMeta& meta, WritePermutationData customDataWrite)
{
if (WriteShaderFunctionBegin(_context, meta))
@@ -393,11 +301,89 @@ bool ShaderCompilerDX::CompileShader(ShaderFunctionMeta& meta, WritePermutationD
shaderReflection->GetDesc(&desc);
// Process shader reflection data
DxShaderHeader header;
Platform::MemoryClear(&header, sizeof(header));
ShaderBindings bindings = { desc.InstructionCount, 0, 0, 0 };
if (ProcessShader(_context, _constantBuffers, shaderReflection.Get(), desc, bindings))
return true;
for (uint32 a = 0; a < desc.ConstantBuffers; a++)
{
auto cb = shaderReflection->GetConstantBufferByIndex(a);
D3D12_SHADER_BUFFER_DESC cbDesc;
cb->GetDesc(&cbDesc);
if (cbDesc.Type == D3D_CT_CBUFFER)
{
// Find CB slot index
int32 slot = INVALID_INDEX;
for (uint32 b = 0; b < desc.BoundResources; b++)
{
D3D12_SHADER_INPUT_BIND_DESC bDesc;
shaderReflection->GetResourceBindingDesc(b, &bDesc);
if (StringUtils::Compare(bDesc.Name, cbDesc.Name) == 0)
{
slot = bDesc.BindPoint;
break;
}
}
if (slot == INVALID_INDEX)
{
_context->OnError("Missing bound resource.");
return true;
}
if (WriteShaderFunctionPermutation(_context, meta, permutationIndex, bindings, shaderBuffer->GetBufferPointer(), (int32)shaderBuffer->GetBufferSize()))
// Set flag
bindings.UsedCBsMask |= 1 << slot;
// Try to add CB to the list
for (int32 b = 0; b < _constantBuffers.Count(); b++)
{
auto& cc = _constantBuffers[b];
if (cc.Slot == slot)
{
cc.IsUsed = true;
cc.Size = cbDesc.Size;
break;
}
}
}
}
for (uint32 i = 0; i < desc.BoundResources; i++)
{
D3D12_SHADER_INPUT_BIND_DESC resDesc;
shaderReflection->GetResourceBindingDesc(i, &resDesc);
switch (resDesc.Type)
{
// Sampler
case D3D_SIT_SAMPLER:
break;
// Constant Buffer
case D3D_SIT_CBUFFER:
case D3D_SIT_TBUFFER:
break;
// Shader Resource
case D3D_SIT_TEXTURE:
bindings.UsedSRsMask |= 1 << resDesc.BindPoint;
header.SrDimensions[resDesc.BindPoint] = resDesc.Dimension;
break;
case D3D_SIT_STRUCTURED:
case D3D_SIT_BYTEADDRESS:
bindings.UsedSRsMask |= 1 << resDesc.BindPoint;
header.SrDimensions[resDesc.BindPoint] = D3D_SRV_DIMENSION_BUFFER;
break;
// Unordered Access
case D3D_SIT_UAV_RWTYPED:
case D3D_SIT_UAV_RWSTRUCTURED:
case D3D_SIT_UAV_RWBYTEADDRESS:
case D3D_SIT_UAV_APPEND_STRUCTURED:
case D3D_SIT_UAV_CONSUME_STRUCTURED:
case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER:
bindings.UsedUAsMask |= 1 << resDesc.BindPoint;
break;
}
}
if (WriteShaderFunctionPermutation(_context, meta, permutationIndex, bindings, &header, sizeof(header), shaderBuffer->GetBufferPointer(), (int32)shaderBuffer->GetBufferSize()))
return true;
if (customDataWrite && customDataWrite(_context, meta, permutationIndex, _macros))

View File

@@ -80,7 +80,7 @@ bool ShaderCompiler::Compile(ShaderCompilationContext* context)
_constantBuffers.Add({ meta->CB[i].Slot, false, 0 });
// [Output] Version number
output->WriteInt32(7);
output->WriteInt32(8);
// [Output] Additional data start
const int32 additionalDataStartPos = output->GetPosition();