Additional improvements to scene rendering

This commit is contained in:
Wojtek Figat
2022-11-15 22:39:03 +01:00
parent a2670dc3b5
commit eb281a7574
11 changed files with 68 additions and 79 deletions

View File

@@ -30,14 +30,12 @@ PACK_STRUCT(struct DeferredMaterialShaderData {
float TimeParam;
Float4 ViewInfo;
Float4 ScreenSize;
Float3 WorldInvScale;
float WorldDeterminantSign;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float4 TemporalAAJitter;
Float3 GeometrySize;
float Dummy1;
float WorldDeterminantSign;
});
DrawPass DeferredMaterialShader::GetDrawModes() const
@@ -96,13 +94,6 @@ void DeferredMaterialShader::Bind(BindParameters& params)
materialData->TimeParam = params.TimeParam;
materialData->ViewInfo = view.ViewInfo;
materialData->ScreenSize = view.ScreenSize;
const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length();
const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length();
const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length();
materialData->WorldInvScale = Float3(
scaleX > 0.00001f ? 1.0f / scaleX : 0.0f,
scaleY > 0.00001f ? 1.0f / scaleY : 0.0f,
scaleZ > 0.00001f ? 1.0f / scaleZ : 0.0f);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -32,14 +32,12 @@ PACK_STRUCT(struct ForwardMaterialShaderData {
float TimeParam;
Float4 ViewInfo;
Float4 ScreenSize;
Float3 WorldInvScale;
float WorldDeterminantSign;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float4 TemporalAAJitter;
Float3 GeometrySize;
float Dummy1;
float WorldDeterminantSign;
});
DrawPass ForwardMaterialShader::GetDrawModes() const
@@ -104,13 +102,6 @@ void ForwardMaterialShader::Bind(BindParameters& params)
materialData->TimeParam = params.TimeParam;
materialData->ViewInfo = view.ViewInfo;
materialData->ScreenSize = view.ScreenSize;
const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length();
const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length();
const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length();
materialData->WorldInvScale = Float3(
scaleX > 0.00001f ? 1.0f / scaleX : 0.0f,
scaleY > 0.00001f ? 1.0f / scaleY : 0.0f,
scaleZ > 0.00001f ? 1.0f / scaleZ : 0.0f);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -117,7 +117,7 @@ public:
/// </summary>
struct InstancingHandler
{
void (*GetHash)(const DrawCall& drawCall, int32& batchKey);
void (*GetHash)(const DrawCall& drawCall, uint32& batchKey);
bool (*CanBatch)(const DrawCall& a, const DrawCall& b);
void (*WriteDrawCall)(struct InstanceData* instanceData, const DrawCall& drawCall);
};

View File

@@ -10,7 +10,7 @@
/// <summary>
/// Current materials shader version.
/// </summary>
#define MATERIAL_GRAPH_VERSION 156
#define MATERIAL_GRAPH_VERSION 157
class Material;
class GPUShader;

View File

@@ -16,11 +16,6 @@
#include "Engine/Level/Scene/Lightmap.h"
#include "Engine/Level/Actors/PostFxVolume.h"
// Amount of bits to use for draw calls batches hash key
#define USE_BATCH_KEY_MASK 0
#define BATCH_KEY_BITS 32
#define BATCH_KEY_MASK ((1 << BATCH_KEY_BITS) - 1)
static_assert(sizeof(DrawCall) <= 280, "Too big draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Terrain), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Particle), "Wrong draw call data size.");
@@ -524,9 +519,10 @@ namespace
}
}
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const DrawCall* drawCalls)
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls)
{
PROFILE_CPU();
const auto* drawCallsData = drawCalls.Get();
const auto* listData = list.Indices.Get();
const int32 listSize = list.Indices.Count();
const Float3 planeNormal = renderContext.View.Direction;
@@ -544,10 +540,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
const uint32 sortKeyXor = reverseDistance ? MAX_uint32 : 0;
for (int32 i = 0; i < listSize; i++)
{
const DrawCall& drawCall = drawCalls[listData[i]];
const DrawCall& drawCall = drawCallsData[listData[i]];
const float distance = Float3::Dot(planeNormal, drawCall.ObjectPosition) - planePoint;
const uint32 sortKey = RenderTools::ComputeDistanceSortKey(distance) ^ sortKeyXor;
int32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
uint32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]);
@@ -556,12 +552,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
if (drawCall.Material->CanUseInstancing(handler))
handler.GetHash(drawCall, batchKey);
batchKey += (int32)(471 * drawCall.WorldDeterminantSign);
#if USE_BATCH_KEY_MASK
const uint32 batchHashKey = (uint32)batchKey & BATCH_KEY_MASK;
#else
const uint32 batchHashKey = (uint32)batchKey;
#endif
sortedKeys[i] = (uint64)batchHashKey << 32 | (uint64)sortKey;
sortedKeys[i] = (uint64)batchKey << 32 | (uint64)sortKey;
}
// Sort draw calls indices
@@ -574,14 +565,14 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
list.Batches.Clear();
for (int32 i = 0; i < listSize;)
{
const DrawCall& drawCall = drawCalls[listData[i]];
const DrawCall& drawCall = drawCallsData[listData[i]];
int32 batchSize = 1;
int32 instanceCount = drawCall.InstanceCount;
// Check the following draw calls to merge them (using instancing)
for (int32 j = i + 1; j < listSize; j++)
{
const DrawCall& other = drawCalls[listData[j]];
const DrawCall& other = drawCallsData[listData[j]];
if (!CanBatchWith(drawCall, other))
break;
@@ -608,11 +599,12 @@ bool CanUseInstancing(DrawPass pass)
return pass == DrawPass::GBuffer || pass == DrawPass::Depth;
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const DrawCall* drawCalls, GPUTextureView* input)
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input)
{
if (list.IsEmpty())
return;
PROFILE_GPU_CPU("Drawing");
const auto* drawCallsData = drawCalls.Get();
const auto* listData = list.Indices.Get();
const auto* batchesData = list.Batches.Get();
const auto context = GPUDevice::Instance->GetMainContext();
@@ -655,10 +647,10 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
if (batch.BatchSize > 1)
{
IMaterial::InstancingHandler handler;
drawCalls[listData[batch.StartIndex]].Material->CanUseInstancing(handler);
drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler);
for (int32 j = 0; j < batch.BatchSize; j++)
{
auto& drawCall = drawCalls[listData[batch.StartIndex + j]];
auto& drawCall = drawCallsData[listData[batch.StartIndex + j]];
handler.WriteDrawCall(instanceData, drawCall);
instanceData++;
}
@@ -691,7 +683,7 @@ DRAW:
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawCall& drawCall = drawCalls[listData[batch.StartIndex]];
const DrawCall& drawCall = drawCallsData[listData[batch.StartIndex]];
int32 vbCount = 0;
while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount])
@@ -866,14 +858,16 @@ DRAW:
}
}
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, int32& batchKey)
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey)
{
batchKey = (batchKey * 397) ^ ::GetHash(drawCall.Surface.Lightmap);
}
bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b)
{
return a.Surface.Lightmap == b.Surface.Lightmap &&
// TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection)
return a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr &&
//return a.Surface.Lightmap == b.Surface.Lightmap &&
a.Surface.Skinning == nullptr &&
b.Surface.Skinning == nullptr;
}

View File

@@ -504,7 +504,7 @@ public:
/// <param name="listType">The collected draw calls list type.</param>
API_FUNCTION() FORCE_INLINE void SortDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, bool reverseDistance, DrawCallsListType listType)
{
SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls.Get());
SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls);
}
/// <summary>
@@ -514,7 +514,7 @@ public:
/// <param name="reverseDistance">If set to <c>true</c> reverse draw call distance to the view. Results in back to front sorting.</param>
/// <param name="list">The collected draw calls indices list.</param>
/// <param name="drawCalls">The collected draw calls list.</param>
void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const DrawCall* drawCalls);
void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls);
/// <summary>
/// Executes the collected draw calls.
@@ -524,7 +524,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
API_FUNCTION() FORCE_INLINE void ExecuteDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, DrawCallsListType listType, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls.Get(), input);
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls, input);
}
/// <summary>
@@ -535,7 +535,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
FORCE_INLINE void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, list, DrawCalls.Get(), input);
ExecuteDrawCalls(renderContext, list, DrawCalls, input);
}
/// <summary>
@@ -545,14 +545,14 @@ public:
/// <param name="list">The collected draw calls indices list.</param>
/// <param name="drawCalls">The collected draw calls list.</param>
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const DrawCall* drawCalls, GPUTextureView* input);
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input);
};
/// <summary>
/// Represents data per instance element used for instanced rendering.
/// </summary>
struct FLAXENGINE_API InstanceData
{
PACK_STRUCT(struct FLAXENGINE_API InstanceData
{
Float3 InstanceOrigin;
float PerInstanceRandom;
Float3 InstanceTransform1;
@@ -560,11 +560,11 @@ struct FLAXENGINE_API InstanceData
Float3 InstanceTransform2;
Float3 InstanceTransform3;
Half4 InstanceLightmapArea;
};
});
struct SurfaceDrawCallHandler
{
static void GetHash(const DrawCall& drawCall, int32& batchKey);
static void GetHash(const DrawCall& drawCall, uint32& batchKey);
static bool CanBatch(const DrawCall& a, const DrawCall& b);
static void WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall);
};

View File

@@ -373,7 +373,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
{
auto& shadowContext = renderContextBatch.Contexts[i];
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth);
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get());
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls);
}
}

View File

@@ -631,7 +631,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererP
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context
@@ -709,7 +709,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererS
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context
@@ -779,7 +779,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererD
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + cascadeIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context

View File

@@ -35,11 +35,9 @@ template<typename T>
class ConcurrentQueue : public moodycamel::ConcurrentQueue<T, ConcurrentQueueSettings>
{
public:
typedef moodycamel::ConcurrentQueue<T, ConcurrentQueueSettings> Base;
public:
/// <summary>
/// Gets an estimate of the total number of elements currently in the queue.
/// </summary>
@@ -52,7 +50,16 @@ public:
/// Adds item to the collection.
/// </summary>
/// <param name="item">The item to add.</param>
FORCE_INLINE void Add(T& item)
FORCE_INLINE void Add(const T& item)
{
enqueue(item);
}
/// <summary>
/// Adds item to the collection.
/// </summary>
/// <param name="item">The item to add.</param>
FORCE_INLINE void Add(T&& item)
{
enqueue(item);
}

View File

@@ -339,6 +339,9 @@ void JobSystem::Wait(int64 label)
WaitMutex.Lock();
WaitSignal.Wait(WaitMutex, 1);
WaitMutex.Unlock();
// Wake up any thread to prevent stalling in highly multi-threaded environment
JobsSignal.NotifyOne();
}
#if JOB_SYSTEM_USE_STATS