Optimize Animated Model bones buffer flushing with delayed draw action to reduce lock contention
#3917 #3827
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
#include "Engine/Level/Scene/Scene.h"
|
||||
#include "Engine/Level/SceneObjectsFactory.h"
|
||||
#include "Engine/Profiler/ProfilerMemory.h"
|
||||
#include "Engine/Renderer/RenderList.h"
|
||||
#include "Engine/Serialization/Serialization.h"
|
||||
|
||||
AnimatedModel::AnimatedModel(const SpawnParams& params)
|
||||
@@ -1012,9 +1013,10 @@ void AnimatedModel::Draw(RenderContext& renderContext)
|
||||
// Flush skinning data with GPU
|
||||
if (_skinningData.IsDirty())
|
||||
{
|
||||
RenderContext::GPULocker.Lock();
|
||||
GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
|
||||
RenderContext::GPULocker.Unlock();
|
||||
renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
|
||||
{
|
||||
context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
|
||||
});
|
||||
_skinningData.OnFlush();
|
||||
}
|
||||
|
||||
@@ -1057,9 +1059,10 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch)
|
||||
// Flush skinning data with GPU
|
||||
if (_skinningData.IsDirty())
|
||||
{
|
||||
RenderContext::GPULocker.Lock();
|
||||
GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
|
||||
RenderContext::GPULocker.Unlock();
|
||||
renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
|
||||
{
|
||||
context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
|
||||
});
|
||||
_skinningData.OnFlush();
|
||||
}
|
||||
|
||||
|
||||
@@ -677,11 +677,10 @@ void CleanupGPUParticlesSorting()
|
||||
SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer);
|
||||
}
|
||||
|
||||
void DrawEmittersGPU(RenderContextBatch& renderContextBatch)
|
||||
void DrawEmittersGPU(GPUContext* context, RenderContextBatch& renderContextBatch)
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("DrawEmittersGPU");
|
||||
ScopeReadLock systemScope(Particles::SystemLocker);
|
||||
GPUContext* context = GPUDevice::Instance->GetMainContext();
|
||||
|
||||
// Count draws and sorting passes needed for resources allocation
|
||||
uint32 indirectArgsSize = 0;
|
||||
@@ -1124,9 +1123,9 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
|
||||
if (GPUEmitterDraws.Count() == 0)
|
||||
{
|
||||
// The first emitter schedules the drawing of all batched draws
|
||||
renderContextBatch.GetMainContext().List->AddDelayedDraw([](RenderContextBatch& renderContextBatch, int32 contextIndex)
|
||||
renderContextBatch.GetMainContext().List->AddDelayedDraw([](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
|
||||
{
|
||||
DrawEmittersGPU(renderContextBatch);
|
||||
DrawEmittersGPU(context, renderContextBatch);
|
||||
});
|
||||
}
|
||||
GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting });
|
||||
|
||||
@@ -258,18 +258,17 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh
|
||||
|
||||
void RenderList::AddDelayedDraw(DelayedDraw&& func)
|
||||
{
|
||||
MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc
|
||||
_delayedDraws.Add(MoveTemp(func));
|
||||
MemPoolLocker.Unlock();
|
||||
}
|
||||
|
||||
void RenderList::DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex)
|
||||
void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
|
||||
{
|
||||
if (_delayedDraws.IsEmpty())
|
||||
if (_delayedDraws.Count() == 0)
|
||||
return;
|
||||
PROFILE_CPU();
|
||||
for (DelayedDraw& e : _delayedDraws)
|
||||
e(renderContextBatch, contextIndex);
|
||||
_delayedDraws.SetCapacity(0);
|
||||
e(context, renderContextBatch, renderContextIndex);
|
||||
_delayedDraws.Clear();
|
||||
}
|
||||
|
||||
void RenderList::BlendSettings()
|
||||
@@ -495,7 +494,6 @@ RenderList::RenderList(const SpawnParams& params)
|
||||
, ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
|
||||
, TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
|
||||
, _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } }))
|
||||
, _delayedDraws(&Memory)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -459,13 +459,14 @@ public:
|
||||
/// </summary>
|
||||
DynamicTypedBuffer TempObjectBuffer;
|
||||
|
||||
typedef Function<void(RenderContextBatch& renderContextBatch, int32 contextIndex)> DelayedDraw;
|
||||
typedef Function<void(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)> DelayedDraw;
|
||||
void AddDelayedDraw(DelayedDraw&& func);
|
||||
void DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex);
|
||||
void DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex);
|
||||
|
||||
/// <summary>
|
||||
/// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing.
|
||||
/// </summary>
|
||||
/// <remarks>Can be called in async during scene rendering (thread-safe internally). Lambda is allocated by concurrent arena allocator owned by the RenderList.</remarks>
|
||||
template<typename T>
|
||||
FORCE_INLINE void AddDelayedDraw(const T& lambda)
|
||||
{
|
||||
@@ -476,7 +477,7 @@ public:
|
||||
|
||||
private:
|
||||
DynamicVertexBuffer _instanceBuffer;
|
||||
Array<DelayedDraw, ConcurrentArenaAllocation> _delayedDraws;
|
||||
RenderListBuffer<DelayedDraw> _delayedDraws;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
|
||||
@@ -461,7 +461,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
|
||||
|
||||
// Perform custom post-scene drawing (eg. GPU dispatches used by VFX)
|
||||
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
|
||||
renderContextBatch.Contexts[i].List->DrainDelayedDraws(renderContextBatch, i);
|
||||
renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i);
|
||||
|
||||
#if USE_EDITOR
|
||||
GBufferPass::Instance()->OverrideDrawCalls(renderContext);
|
||||
|
||||
Reference in New Issue
Block a user