Optimize Animated Models bones updating with a batches memory pass and manual resource transitions batch

#3917 #3827
This commit is contained in:
Wojtek Figat
2026-02-09 23:03:25 +01:00
parent 0f6c1aea62
commit 55f73b6cf7
4 changed files with 125 additions and 10 deletions

View File

@@ -14,15 +14,84 @@
#include "Engine/Content/Deprecated.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUPass.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/Models/MeshAccessor.h"
#include "Engine/Graphics/Models/MeshDeformation.h"
#include "Engine/Renderer/RenderList.h"
#include "Engine/Level/Scene/Scene.h"
#include "Engine/Level/SceneObjectsFactory.h"
#include "Engine/Profiler/ProfilerMemory.h"
#include "Engine/Renderer/RenderList.h"
#include "Engine/Profiler/Profiler.h"
#include "Engine/Serialization/Serialization.h"
// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models.
class AnimatedModelRenderListExtension : public RenderList::IExtension
{
public:
struct Item
{
GPUBuffer* BoneMatrices;
void* Data;
int32 Size;
};
RenderListBuffer<Item> Items;
void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
{
Items.Clear();
}
void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
{
const int32 count = Items.Count();
if (count == 0)
return;
PROFILE_GPU_CPU_NAMED("Update Bones");
GPUMemoryPass pass(context);
Item* items = Items.Get();
// Special case for D3D11 backend that doesn't need transitions
if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11)
{
for (int32 i = 0; i < count; i++)
{
Item& item = items[i];
context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
}
}
else
{
// Batch resource barriers for buffer update
for (int32 i = 0; i < count; i++)
pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite);
// Update all buffers within Memory Pass (no barriers between)
for (int32 i = 0; i < count; i++)
{
Item& item = items[i];
context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
}
// Batch resource barriers for reading in Vertex Shader
for (int32 i = 0; i < count; i++)
pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics);
}
#if COMPILE_WITH_PROFILER
// Insert amount of kilobytes of data updated into profiler trace
uint32 dataSize = 0;
for (int32 i = 0; i < count; i++)
dataSize += items[i].Size;
ZoneValue(dataSize / 1024);
#endif
Items.Clear();
}
};
AnimatedModelRenderListExtension RenderListExtension;
AnimatedModel::AnimatedModel(const SpawnParams& params)
: ModelInstanceActor(params)
, _actualMode(AnimationUpdateMode::Never)
@@ -1013,10 +1082,7 @@ void AnimatedModel::Draw(RenderContext& renderContext)
// Flush skinning data with GPU
if (_skinningData.IsDirty())
{
renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
{
context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
});
RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
_skinningData.OnFlush();
}
@@ -1059,10 +1125,7 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch)
// Flush skinning data with GPU
if (_skinningData.IsDirty())
{
renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
{
context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
});
RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
_skinningData.OnFlush();
}

View File

@@ -31,6 +31,13 @@ namespace
Array<RenderList*> FreeRenderList;
Array<Pair<void*, uintptr>> MemPool;
CriticalSection MemPoolLocker;
typedef Array<RenderList::IExtension*, FixedAllocation<8>> ExtensionsList;
ExtensionsList& GetExtensions()
{
static ExtensionsList list;
return list;
}
}
void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor)
@@ -236,6 +243,16 @@ void RenderList::CleanupCache()
MemPoolLocker.Unlock();
}
RenderList::IExtension::IExtension()
{
GetExtensions().Add(this);
}
RenderList::IExtension::~IExtension()
{
GetExtensions().Remove(this);
}
bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const
{
// Sort by higher priority
@@ -271,6 +288,20 @@ void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& rend
_delayedDraws.Clear();
}
#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions)
void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
{
LOOP_EXTENSIONS()
e->PreDraw(context, renderContextBatch);
}
void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
{
LOOP_EXTENSIONS()
e->PostDraw(context, renderContextBatch);
}
void RenderList::BlendSettings()
{
PROFILE_CPU();

View File

@@ -326,6 +326,21 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject
/// </summary>
static void CleanupCache();
/// <summary>
/// The rendering extension interface for custom drawing/effects linked to RenderList. Can be used during async scene drawing and further drawing/processing for more optimized rendering.
/// </summary>
class FLAXENGINE_API IExtension
{
public:
IExtension();
virtual ~IExtension();
// Event called before collecting draw calls. Can be used for initialization.
virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {}
// Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing.
virtual void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {}
};
public:
/// <summary>
/// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs.
@@ -475,6 +490,10 @@ public:
AddDelayedDraw(MoveTemp(func));
}
// IExtension implementation
void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch);
void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch);
private:
DynamicVertexBuffer _instanceBuffer;
RenderListBuffer<DelayedDraw> _delayedDraws;

View File

@@ -423,6 +423,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
if (setup.UseMotionVectors)
view.Pass |= DrawPass::MotionVectors;
renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value
renderContext.List->PreDraw(context, renderContextBatch);
bool drawShadows = !isGBufferDebug && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows) && ShadowsPass::Instance()->IsReady();
switch (renderContext.View.Mode)
@@ -462,6 +463,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
// Perform custom post-scene drawing (eg. GPU dispatches used by VFX)
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i);
renderContext.List->PostDraw(context, renderContextBatch);
#if USE_EDITOR
GBufferPass::Instance()->OverrideDrawCalls(renderContext);