From 55f73b6cf7acf55613d52ad528db70eb69f03eff Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 23:03:25 +0100 Subject: [PATCH] Optimize Animated Models bones updating with a batches memory pass and manual resource transitions batch #3917 #3827 --- Source/Engine/Level/Actors/AnimatedModel.cpp | 83 +++++++++++++++++--- Source/Engine/Renderer/RenderList.cpp | 31 ++++++++ Source/Engine/Renderer/RenderList.h | 19 +++++ Source/Engine/Renderer/Renderer.cpp | 2 + 4 files changed, 125 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index f6ac51f1c..11497e558 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -14,15 +14,84 @@ #include "Engine/Content/Deprecated.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/Models/MeshAccessor.h" #include "Engine/Graphics/Models/MeshDeformation.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/SceneObjectsFactory.h" -#include "Engine/Profiler/ProfilerMemory.h" -#include "Engine/Renderer/RenderList.h" +#include "Engine/Profiler/Profiler.h" #include "Engine/Serialization/Serialization.h" +// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models. +class AnimatedModelRenderListExtension : public RenderList::IExtension +{ +public: + struct Item + { + GPUBuffer* BoneMatrices; + void* Data; + int32 Size; + }; + + RenderListBuffer Items; + + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + Items.Clear(); + } + + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + const int32 count = Items.Count(); + if (count == 0) + return; + PROFILE_GPU_CPU_NAMED("Update Bones"); + GPUMemoryPass pass(context); + Item* items = Items.Get(); + + // Special case for D3D11 backend that doesn't need transitions + if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11) + { + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + } + else + { + // Batch resource barriers for buffer update + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite); + + // Update all buffers within Memory Pass (no barriers between) + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + + // Batch resource barriers for reading in Vertex Shader + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics); + } + +#if COMPILE_WITH_PROFILER + // Insert amount of kilobytes of data updated into profiler trace + uint32 dataSize = 0; + for (int32 i = 0; i < count; i++) + dataSize += items[i].Size; + ZoneValue(dataSize / 1024); +#endif + + Items.Clear(); + } +}; + +AnimatedModelRenderListExtension RenderListExtension; + AnimatedModel::AnimatedModel(const SpawnParams& params) : ModelInstanceActor(params) , _actualMode(AnimationUpdateMode::Never) @@ -1013,10 +1082,7 @@ void AnimatedModel::Draw(RenderContext& renderContext) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) - { - context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - }); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); _skinningData.OnFlush(); } @@ -1059,10 +1125,7 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) - { - context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - }); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); _skinningData.OnFlush(); } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 23fcd52dc..ac643b4e8 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -31,6 +31,13 @@ namespace Array FreeRenderList; Array> MemPool; CriticalSection MemPoolLocker; + + typedef Array> ExtensionsList; + ExtensionsList& GetExtensions() + { + static ExtensionsList list; + return list; + } } void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor) @@ -236,6 +243,16 @@ void RenderList::CleanupCache() MemPoolLocker.Unlock(); } +RenderList::IExtension::IExtension() +{ + GetExtensions().Add(this); +} + +RenderList::IExtension::~IExtension() +{ + GetExtensions().Remove(this); +} + bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const { // Sort by higher priority @@ -271,6 +288,20 @@ void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& rend _delayedDraws.Clear(); } +#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions) + +void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PreDraw(context, renderContextBatch); +} + +void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PostDraw(context, renderContextBatch); +} + void RenderList::BlendSettings() { PROFILE_CPU(); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 9b0a91ed6..b4b7121de 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -326,6 +326,21 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject /// static void CleanupCache(); + /// + /// The rendering extension interface for custom drawing/effects linked to RenderList. Can be used during async scene drawing and further drawing/processing for more optimized rendering. + /// + class FLAXENGINE_API IExtension + { + public: + IExtension(); + virtual ~IExtension(); + + // Event called before collecting draw calls. Can be used for initialization. + virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + // Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing. + virtual void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + }; + public: /// /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs. @@ -475,6 +490,10 @@ public: AddDelayedDraw(MoveTemp(func)); } + // IExtension implementation + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + private: DynamicVertexBuffer _instanceBuffer; RenderListBuffer _delayedDraws; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 2f17e5294..96253934e 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -423,6 +423,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont if (setup.UseMotionVectors) view.Pass |= DrawPass::MotionVectors; renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value + renderContext.List->PreDraw(context, renderContextBatch); bool drawShadows = !isGBufferDebug && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows) && ShadowsPass::Instance()->IsReady(); switch (renderContext.View.Mode) @@ -462,6 +463,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i); + renderContext.List->PostDraw(context, renderContextBatch); #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext);