From c18b9163ca92ee82246d1098402c25cd121b5ca6 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 09:11:12 +0100 Subject: [PATCH 01/23] Refactor Job System to reduce mutex usage with more atomic operations #3917 --- Source/Engine/Threading/JobSystem.cpp | 305 ++++++++++++++------------ 1 file changed, 167 insertions(+), 138 deletions(-) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 692a088b7..cbdf53136 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -8,7 +8,6 @@ #include "Engine/Core/Types/Span.h" #include "Engine/Core/Types/Pair.h" #include "Engine/Core/Memory/SimpleHeapAllocation.h" -#include "Engine/Core/Collections/Dictionary.h" #include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -22,14 +21,6 @@ #if JOB_SYSTEM_ENABLED -// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread). -class JobSystemAllocation : public SimpleHeapAllocation -{ -public: - static void* Allocate(uintptr size); - static void Free(void* ptr, uintptr size); -}; - class JobSystemService : public EngineService { public: @@ -43,24 +34,25 @@ public: void Dispose() override; }; -struct JobData -{ - int32 Index; - int64 JobKey; -}; - -template<> -struct TIsPODType -{ - enum { Value = true }; -}; - -struct JobContext +// Holds a single job dispatch data +struct alignas(int64) JobContext { + // The next index of the job to process updated when picking a job by the thread. + volatile int64 JobIndex; + // The number of jobs left to process updated after job completion by the thread. volatile int64 JobsLeft; - int32 DependenciesLeft; + // The unique label of this job used to identify it. Set to -1 when job is done. + volatile int64 JobLabel; + // Utility atomic counter used to indicate that any job is waiting for this one to finish. Then Dependants can be accessed within thread-safe JobsLocker. + volatile int64 DependantsCount; + // The number of dependency jobs left to be finished before starting this job. + volatile int64 DependenciesLeft; + // The total number of jobs to process (in this context). + int32 JobsCount; + // The job function to execute. Function Job; - Array Dependants; + // List of dependant jobs to signal when this job is done. + Array Dependants; }; template<> @@ -92,50 +84,36 @@ public: namespace { JobSystemService JobSystemInstance; - Array> MemPool; Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {}; int32 ThreadsCount = 0; bool JobStartingOnDispatch = true; volatile int64 ExitFlag = 0; volatile int64 JobLabel = 0; - Dictionary JobContexts; + volatile int64 JobEndLabel = 0; + volatile int64 JobStartLabel = 0; + volatile int64 JobContextsCount = 0; + uint32 JobContextsSize = 0; + uint32 JobContextsMask = 0; + JobContext* JobContexts = nullptr; ConditionVariable JobsSignal; CriticalSection JobsMutex; ConditionVariable WaitSignal; CriticalSection WaitMutex; CriticalSection JobsLocker; - RingBuffer Jobs; -} - -void* JobSystemAllocation::Allocate(uintptr size) -{ - void* result = nullptr; - for (int32 i = 0; i < MemPool.Count(); i++) - { - if (MemPool.Get()[i].Second == size) - { - result = MemPool.Get()[i].First; - MemPool.RemoveAt(i); - break; - } - } - if (!result) - { - PROFILE_MEM(EngineThreading); - result = Platform::Allocate(size, 16); - } - return result; -} - -void JobSystemAllocation::Free(void* ptr, uintptr size) -{ - PROFILE_MEM(EngineThreading); - MemPool.Add({ ptr, size }); +#define GET_CONTEXT_INDEX(label) (uint32)((label) & (int64)JobContextsMask) } bool JobSystemService::Init() { PROFILE_MEM(EngineThreading); + + // Initialize job context storage (fixed-size ring buffer for active jobs tracking) + JobContextsSize = 256; + JobContextsMask = JobContextsSize - 1; + JobContexts = (JobContext*)Platform::Allocate(JobContextsSize * sizeof(JobContext), alignof(JobContext)); + Platform::MemoryClear(JobContexts, sizeof(JobContextsSize * sizeof(JobContext))); + + // Spawn threads ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); for (int32 i = 0; i < ThreadsCount; i++) { @@ -146,6 +124,7 @@ bool JobSystemService::Init() return true; Threads[i] = thread; } + return false; } @@ -171,35 +150,66 @@ void JobSystemService::Dispose() } } - JobContexts.SetCapacity(0); - Jobs.Release(); - for (auto& e : MemPool) - Platform::Free(e.First); - MemPool.Clear(); + Platform::Free(JobContexts); + JobContexts = nullptr; } int32 JobSystemThread::Run() { + // Pin thread to the physical core Platform::SetThreadAffinityMask(1ull << Index); - JobData data; - Function job; bool attachCSharpThread = true; MONO_THREAD_INFO_TYPE* monoThreadInfo = nullptr; while (Platform::AtomicRead(&ExitFlag) == 0) { // Try to get a job - JobsLocker.Lock(); - if (Jobs.Count() != 0) + int32 jobIndex; + JobContext* jobContext = nullptr; { - data = Jobs.PeekFront(); - Jobs.PopFront(); - const JobContext& context = ((const Dictionary&)JobContexts).At(data.JobKey); - job = context.Job; - } - JobsLocker.Unlock(); + int64 jobOffset = 0; + RETRY: + int64 jobStartLabel = Platform::AtomicRead(&JobStartLabel) + jobOffset; + int64 jobEndLabel = Platform::AtomicRead(&JobEndLabel); + if (jobStartLabel <= jobEndLabel && jobEndLabel > 0) + { + jobContext = &JobContexts[GET_CONTEXT_INDEX(jobStartLabel)]; + if (Platform::AtomicRead(&jobContext->DependenciesLeft) > 0) + { + // This job still waits for dependency so skip it for now and try the next one + jobOffset++; + jobContext = nullptr; + goto RETRY; + } - if (job.IsBinded()) + // Move forward with index for a job + jobIndex = (int32)(Platform::InterlockedIncrement(&jobContext->JobIndex) - 1); + if (jobIndex < jobContext->JobsCount) + { + // Index is valid + } + else if (jobStartLabel < jobEndLabel && jobOffset == 0) + { + // No more jobs inside this context, move to the next one + Platform::InterlockedCompareExchange(&JobStartLabel, jobStartLabel + 1, jobStartLabel); + jobContext = nullptr; + goto RETRY; + } + else + { + // No more jobs + jobContext = nullptr; + if (jobStartLabel < jobEndLabel) + { + // Try with a different one before going to sleep + jobOffset++; + goto RETRY; + } + } + } + } + + if (jobContext) { #if USE_CSHARP // Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System) @@ -212,37 +222,39 @@ int32 JobSystemThread::Run() #endif // Run job - job(data.Index); + jobContext->Job(jobIndex); // Move forward with the job queue - bool notifyWaiting = false; - JobsLocker.Lock(); - JobContext& context = JobContexts.At(data.JobKey); - if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0) + if (Platform::InterlockedDecrement(&jobContext->JobsLeft) <= 0) { - // Update any dependant jobs - for (int64 dependant : context.Dependants) + // Mark job as done before processing dependants + Platform::AtomicStore(&jobContext->JobLabel, -1); + + // Check if any other job waits on this one + if (Platform::AtomicRead(&jobContext->DependantsCount) != 0) { - JobContext& dependantContext = JobContexts.At(dependant); - if (--dependantContext.DependenciesLeft <= 0) + // Update dependant jobs + JobsLocker.Lock(); + for (int64 dependant : jobContext->Dependants) { - // Dispatch dependency when it's ready - JobData dependantData; - dependantData.JobKey = dependant; - for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++) - Jobs.PushBack(dependantData); + JobContext& dependantContext = JobContexts[GET_CONTEXT_INDEX(dependant)]; + if (dependantContext.JobLabel == dependant) + Platform::InterlockedDecrement(&dependantContext.DependenciesLeft); } + JobsLocker.Unlock(); } - // Remove completed context - JobContexts.Remove(data.JobKey); - notifyWaiting = true; - } - JobsLocker.Unlock(); - if (notifyWaiting) - WaitSignal.NotifyAll(); + // Cleanup completed context + jobContext->Job.Unbind(); + jobContext->Dependants.Clear(); + Platform::AtomicStore(&jobContext->DependantsCount, 0); + Platform::AtomicStore(&jobContext->DependenciesLeft, -999); // Mark to indicate deleted context + Platform::AtomicStore(&jobContext->JobLabel, -1); + Platform::InterlockedDecrement(&JobContextsCount); - job.Unbind(); + // Wakeup any thread waiting for the jobs to complete + WaitSignal.NotifyAll(); + } } else { @@ -266,8 +278,8 @@ void JobSystem::Execute(const Function& job, int32 jobCount) if (jobCount > 1) { // Async - const int64 jobWaitHandle = Dispatch(job, jobCount); - Wait(jobWaitHandle); + const int64 label = Dispatch(job, jobCount); + Wait(label); } else #endif @@ -284,21 +296,32 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) return 0; PROFILE_CPU(); #if JOB_SYSTEM_ENABLED - const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; + while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize) + { + // Too many jobs in flight, wait for some to complete to free up contexts + PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW"); + ZoneColor(TracyWaitZoneColor); + Platform::InterlockedDecrement(&JobContextsCount); + Platform::Sleep(1); + } - JobData data; - data.JobKey = label; + // Get a new label + const int64 label = Platform::InterlockedIncrement(&JobLabel); - JobContext context; + // Build job + JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; context.Job = job; + context.JobIndex = 0; context.JobsLeft = jobCount; + context.JobLabel = label; + context.DependantsCount = 0; context.DependenciesLeft = 0; + context.JobsCount = jobCount; + ASSERT(context.Dependants.IsEmpty()); + context.Dependants.Clear(); - JobsLocker.Lock(); - JobContexts.Add(label, MoveTemp(context)); - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.PushBack(data); - JobsLocker.Unlock(); + // Move the job queue forward + Platform::InterlockedIncrement(&JobEndLabel); if (JobStartingOnDispatch) { @@ -321,34 +344,48 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen if (jobCount <= 0) return 0; PROFILE_CPU(); + PROFILE_MEM(EngineThreading); #if JOB_SYSTEM_ENABLED - const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; + while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize) + { + // Too many jobs in flight, wait for some to complete to free up contexts + PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW"); + ZoneColor(TracyWaitZoneColor); + Platform::InterlockedDecrement(&JobContextsCount); + Platform::Sleep(1); + } - JobData data; - data.JobKey = label; + // Get a new label + const int64 label = Platform::InterlockedIncrement(&JobLabel); - JobContext context; + // Build job + JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; context.Job = job; + context.JobIndex = 0; context.JobsLeft = jobCount; + context.JobLabel = label; + context.DependantsCount = 0; context.DependenciesLeft = 0; - - JobsLocker.Lock(); - for (int64 dependency : dependencies) + context.JobsCount = jobCount; + ASSERT(context.Dependants.IsEmpty()); + context.Dependants.Clear(); { - if (JobContext* dependencyContext = JobContexts.TryGet(dependency)) + JobsLocker.Lock(); + for (int64 dependency : dependencies) { - context.DependenciesLeft++; - dependencyContext->Dependants.Add(label); + JobContext& dependencyContext = JobContexts[GET_CONTEXT_INDEX(dependency)]; + if (Platform::AtomicRead(&dependencyContext.JobLabel) == dependency) + { + Platform::InterlockedIncrement(&dependencyContext.DependantsCount); + dependencyContext.Dependants.Add(label); + context.DependenciesLeft++; + } } + JobsLocker.Unlock(); } - JobContexts.Add(label, MoveTemp(context)); - if (context.DependenciesLeft == 0) - { - // No dependencies left to complete so dispatch now - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.PushBack(data); - } - JobsLocker.Unlock(); + + // Move the job queue forward + Platform::InterlockedIncrement(&JobEndLabel); if (context.DependenciesLeft == 0 && JobStartingOnDispatch) { @@ -369,19 +406,17 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen void JobSystem::Wait() { #if JOB_SYSTEM_ENABLED - JobsLocker.Lock(); - int32 numJobs = JobContexts.Count(); - JobsLocker.Unlock(); + PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); + int64 numJobs = Platform::AtomicRead(&JobContextsCount); while (numJobs > 0) { WaitMutex.Lock(); WaitSignal.Wait(WaitMutex, 1); WaitMutex.Unlock(); - JobsLocker.Lock(); - numJobs = JobContexts.Count(); - JobsLocker.Unlock(); + numJobs = Platform::AtomicRead(&JobContextsCount); } #endif } @@ -394,12 +429,11 @@ void JobSystem::Wait(int64 label) while (Platform::AtomicRead(&ExitFlag) == 0) { - JobsLocker.Lock(); - const JobContext* context = JobContexts.TryGet(label); - JobsLocker.Unlock(); + const JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; + const bool finished = Platform::AtomicRead(&context.JobLabel) != label || Platform::AtomicRead(&context.JobsLeft) <= 0; // Skip if context has been already executed (last job removes it) - if (!context) + if (finished) break; // Wait on signal until input label is not yet done @@ -417,15 +451,10 @@ void JobSystem::SetJobStartingOnDispatch(bool value) { #if JOB_SYSTEM_ENABLED JobStartingOnDispatch = value; - if (value) + if (value && (Platform::AtomicRead(&JobEndLabel) - Platform::AtomicRead(&JobStartLabel)) > 0) { - JobsLocker.Lock(); - const int32 count = Jobs.Count(); - JobsLocker.Unlock(); - if (count == 1) - JobsSignal.NotifyOne(); - else if (count != 0) - JobsSignal.NotifyAll(); + // Wake up threads to start processing jobs that may be already in the queue + JobsSignal.NotifyAll(); } #endif } From 20516bb8bc86170cd8ec9d06263b0de34d263e28 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 09:11:45 +0100 Subject: [PATCH 02/23] Fix foliage dithered LOD transitions when using shadows --- Source/Engine/Foliage/Foliage.cpp | 81 ++++++++++++++++++------------- 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index f8b9c7b0f..6d3d8b700 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -7,17 +7,17 @@ #include "Engine/Core/Random.h" #include "Engine/Engine/Engine.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/GPUDevice.h" #include "Engine/Content/Deprecated.h" #if !FOLIAGE_USE_SINGLE_QUAD_TREE #include "Engine/Threading/JobSystem.h" #if FOLIAGE_USE_DRAW_CALLS_BATCHING #include "Engine/Graphics/RenderTools.h" -#include "Engine/Graphics/GPUDevice.h" -#include "Engine/Renderer/RenderList.h" #endif #endif #include "Engine/Level/SceneQuery.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Renderer/GlobalSignDistanceFieldPass.h" #include "Engine/Renderer/GI/GlobalSurfaceAtlasPass.h" #include "Engine/Serialization/Serialization.h" @@ -193,6 +193,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, // Draw visible instances const auto frame = Engine::FrameCount; const auto model = type.Model.Get(); + const auto transitionLOD = renderContext.View.Pass != DrawPass::Depth; // Let the main view pass update LOD transitions + // TODO: move DrawState to be stored per-view (so shadows can fade objects on their own) for (int32 i = 0; i < cluster->Instances.Count(); i++) { auto& instance = *cluster->Instances.Get()[i]; @@ -210,20 +212,29 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, // Handling model fade-out transition if (modelFrame == frame && instance.DrawState.PrevLOD != -1) { - // Check if start transition - if (instance.DrawState.LODTransition == 255) + if (transitionLOD) { - instance.DrawState.LODTransition = 0; - } + // Check if start transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.LODTransition = 0; + } - RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); + RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); - // Check if end transition - if (instance.DrawState.LODTransition == 255) - { - instance.DrawState.PrevLOD = lodIndex; + // Check if end transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.PrevLOD = lodIndex; + } + else + { + const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD); + const float normalizedProgress = static_cast(instance.DrawState.LODTransition) * (1.0f / 255.0f); + DrawInstance(renderContext, instance, type, model, prevLOD, normalizedProgress, drawCallsLists, result); + } } - else + else if (instance.DrawState.LODTransition < 255) { const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD); const float normalizedProgress = static_cast(instance.DrawState.LODTransition) * (1.0f / 255.0f); @@ -236,29 +247,32 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, lodIndex += renderContext.View.ModelLODBias; lodIndex = model->ClampLODIndex(lodIndex); - // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports) - if (modelFrame == frame) + if (transitionLOD) { - // Check if start transition - if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255) + // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports) + if (modelFrame == frame) { + // Check if start transition + if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255) + { + instance.DrawState.LODTransition = 0; + } + + RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); + + // Check if end transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.PrevLOD = lodIndex; + } + } + // Check if there was a gap between frames in drawing this model instance + else if (modelFrame < frame || instance.DrawState.PrevLOD == -1) + { + // Reset state + instance.DrawState.PrevLOD = lodIndex; instance.DrawState.LODTransition = 0; } - - RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); - - // Check if end transition - if (instance.DrawState.LODTransition == 255) - { - instance.DrawState.PrevLOD = lodIndex; - } - } - // Check if there was a gap between frames in drawing this model instance - else if (modelFrame < frame || instance.DrawState.PrevLOD == -1) - { - // Reset state - instance.DrawState.PrevLOD = lodIndex; - instance.DrawState.LODTransition = 255; } // Draw @@ -281,7 +295,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, //DebugDraw::DrawSphere(instance.Bounds, Color::YellowGreen); - instance.DrawState.PrevFrame = frame; + if (transitionLOD) + instance.DrawState.PrevFrame = frame; } } } @@ -350,7 +365,7 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, draw.DrawState = &instance.DrawState; draw.Bounds = sphere; draw.PerInstanceRandom = instance.Random; - draw.DrawModes = type._drawModes; + draw.DrawModes = type.DrawModes; draw.SetStencilValue(_layer); type.Model->Draw(renderContext, draw); From 70b324cdecaba0a7b7a61112d9c9783dc20f2c9b Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 09:11:55 +0100 Subject: [PATCH 03/23] Optimize included header usage --- Source/Engine/Renderer/RenderList.cpp | 1 + Source/Engine/Renderer/RenderList.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 544438bb5..453fd71a9 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -15,6 +15,7 @@ #include "Engine/Profiler/Profiler.h" #include "Engine/Content/Assets/CubeTexture.h" #include "Engine/Core/Log.h" +#include "Engine/Core/Math/Half.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Level/Scene/Lightmap.h" #include "Engine/Level/Actors/PostFxVolume.h" diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 8eb3540e0..a37d2a941 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -4,7 +4,6 @@ #include "Engine/Core/Collections/Array.h" #include "Engine/Core/Memory/ArenaAllocation.h" -#include "Engine/Core/Math/Half.h" #include "Engine/Graphics/PostProcessSettings.h" #include "Engine/Graphics/DynamicBuffer.h" #include "Engine/Scripting/ScriptingObject.h" From 5d0fdc83138e42ddcea44c4f99c090a96ae335ce Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 09:28:06 +0100 Subject: [PATCH 04/23] Add info about skinned model skeleton size --- Source/Editor/Windows/Assets/SkinnedModelWindow.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Editor/Windows/Assets/SkinnedModelWindow.cs b/Source/Editor/Windows/Assets/SkinnedModelWindow.cs index 392cc896a..75fa87dfe 100644 --- a/Source/Editor/Windows/Assets/SkinnedModelWindow.cs +++ b/Source/Editor/Windows/Assets/SkinnedModelWindow.cs @@ -70,6 +70,13 @@ namespace FlaxEditor.Windows.Assets return; var nodes = proxy.Asset.Nodes; var bones = proxy.Asset.Bones; + var blendShapes = proxy.Asset.BlendShapes; + + // Info + { + var group = layout.Group("Info"); + group.Label($"Nodes: {nodes.Length}\nBones: {bones.Length}\nBlend Shapes: {blendShapes.Length}").AddCopyContextMenu().Label.Height *= 2.5f; + } // Skeleton Bones { @@ -109,7 +116,6 @@ namespace FlaxEditor.Windows.Assets } // Blend Shapes - var blendShapes = proxy.Asset.BlendShapes; if (blendShapes.Length != 0) { var group = layout.Group("Blend Shapes"); From 4a7f1a5fde285daba16e2038a3207b33a6827eb7 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 10:37:52 +0100 Subject: [PATCH 05/23] Fix various issues --- .../Windows/Assets/AnimationGraphWindow.cs | 9 +++- Source/Engine/Foliage/Foliage.cpp | 4 +- Source/Engine/Foliage/FoliageCluster.cpp | 45 ++++++++++--------- Source/Engine/Level/Actor.cpp | 4 +- Source/Engine/Threading/JobSystem.cpp | 2 - 5 files changed, 37 insertions(+), 27 deletions(-) diff --git a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs index 7e809d968..12142219c 100644 --- a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs +++ b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs @@ -99,7 +99,14 @@ namespace FlaxEditor.Windows.Assets Window = window; var surfaceParam = window.Surface.GetParameter(BaseModelId); if (surfaceParam != null) - BaseModel = FlaxEngine.Content.LoadAsync((Guid)surfaceParam.Value); + { + if (surfaceParam.Value is Guid asGuid) + BaseModel = FlaxEngine.Content.LoadAsync(asGuid); + else if (surfaceParam.Value is SkinnedModel asModel) + BaseModel = asModel; + else + BaseModel = null; + } else BaseModel = window.PreviewActor.GetParameterValue(BaseModelId) as SkinnedModel; } diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index 6d3d8b700..116866848 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -41,8 +41,7 @@ Foliage::Foliage(const SpawnParams& params) void Foliage::AddToCluster(ChunkedArray& clusters, FoliageCluster* cluster, FoliageInstance& instance) { - ASSERT(instance.Bounds.Radius > ZeroTolerance); - ASSERT(cluster->Bounds.Intersects(instance.Bounds)); + ASSERT_LOW_LAYER(instance.Bounds.Radius > ZeroTolerance); // Minor clusters don't use bounds intersection but try to find the first free cluster instead if (cluster->IsMinor) @@ -63,6 +62,7 @@ void Foliage::AddToCluster(ChunkedArrayBounds.Intersects(instance.Bounds)); while (cluster->Children[0]) { #define CHECK_CHILD(idx) \ diff --git a/Source/Engine/Foliage/FoliageCluster.cpp b/Source/Engine/Foliage/FoliageCluster.cpp index fd4c0f753..107bf265a 100644 --- a/Source/Engine/Foliage/FoliageCluster.cpp +++ b/Source/Engine/Foliage/FoliageCluster.cpp @@ -21,26 +21,7 @@ void FoliageCluster::Init(const BoundingBox& bounds) void FoliageCluster::UpdateTotalBoundsAndCullDistance() { - if (Children[0]) - { - ASSERT(Instances.IsEmpty()); - - Children[0]->UpdateTotalBoundsAndCullDistance(); - Children[1]->UpdateTotalBoundsAndCullDistance(); - Children[2]->UpdateTotalBoundsAndCullDistance(); - Children[3]->UpdateTotalBoundsAndCullDistance(); - - TotalBounds = Children[0]->TotalBounds; - BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds); - BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds); - BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds); - - MaxCullDistance = Children[0]->MaxCullDistance; - MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance); - MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance); - MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance); - } - else if (Instances.HasItems()) + if (Instances.HasItems()) { BoundingBox box; BoundingBox::FromSphere(Instances[0]->Bounds, TotalBounds); @@ -58,6 +39,30 @@ void FoliageCluster::UpdateTotalBoundsAndCullDistance() MaxCullDistance = 0; } + if (Children[0]) + { + Children[0]->UpdateTotalBoundsAndCullDistance(); + Children[1]->UpdateTotalBoundsAndCullDistance(); + Children[2]->UpdateTotalBoundsAndCullDistance(); + Children[3]->UpdateTotalBoundsAndCullDistance(); + + if (Instances.HasItems()) + BoundingBox::Merge(TotalBounds, Children[0]->TotalBounds, TotalBounds); + else + TotalBounds = Children[0]->TotalBounds; + BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds); + BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds); + BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds); + + if (Instances.HasItems()) + MaxCullDistance = Math::Max(MaxCullDistance, Children[0]->MaxCullDistance); + else + MaxCullDistance = Children[0]->MaxCullDistance; + MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance); + MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance); + MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance); + } + BoundingSphere::FromBox(TotalBounds, TotalBoundsSphere); } diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 7d07cd0e7..f52fab600 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -1685,7 +1685,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos) const { const Vector3 direction = worldPos - _transform.Translation; if (direction.LengthSquared() < ZeroTolerance) - return _parent->GetOrientation(); + return _parent ? _parent->GetOrientation() : Quaternion::Identity; const Float3 newForward = Vector3::Normalize(direction); const Float3 oldForward = _transform.Orientation * Vector3::Forward; @@ -1712,7 +1712,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos, const Vector3& worldUp) con { const Vector3 direction = worldPos - _transform.Translation; if (direction.LengthSquared() < ZeroTolerance) - return _parent->GetOrientation(); + return _parent ? _parent->GetOrientation() : Quaternion::Identity; const Float3 forward = Vector3::Normalize(direction); const Float3 up = Vector3::Normalize(worldUp); if (Math::IsOne(Float3::Dot(forward, up))) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index cbdf53136..4d90f1c06 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -317,7 +317,6 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) context.DependantsCount = 0; context.DependenciesLeft = 0; context.JobsCount = jobCount; - ASSERT(context.Dependants.IsEmpty()); context.Dependants.Clear(); // Move the job queue forward @@ -367,7 +366,6 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen context.DependantsCount = 0; context.DependenciesLeft = 0; context.JobsCount = jobCount; - ASSERT(context.Dependants.IsEmpty()); context.Dependants.Clear(); { JobsLocker.Lock(); From b4cb1028edb41fb70f087fcbf6f353fcc36504fb Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 12:47:07 +0100 Subject: [PATCH 06/23] Fix typos --- Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp | 5 ----- Source/Engine/Graphics/Models/SkinnedMeshDrawData.h | 7 +------ Source/Engine/Level/Actors/AnimatedModel.cpp | 2 +- Source/Engine/Platform/Base/FileSystemBase.cpp | 8 ++++---- 4 files changed, 6 insertions(+), 16 deletions(-) diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp index 8470facac..eaa4aa5a9 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp @@ -5,11 +5,6 @@ #include "Engine/Animations/Config.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Matrix.h" -#include "Engine/Core/Math/Matrix3x4.h" - -SkinnedMeshDrawData::SkinnedMeshDrawData() -{ -} SkinnedMeshDrawData::~SkinnedMeshDrawData() { diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h index 24d5ca230..dc780a26d 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h @@ -36,11 +36,6 @@ public: Array Data; public: - /// - /// Initializes a new instance of the class. - /// - SkinnedMeshDrawData(); - /// /// Finalizes an instance of the class. /// @@ -76,7 +71,7 @@ public: void OnDataChanged(bool dropHistory); /// - /// After bones Data has been send to the GPU buffer. + /// After bones Data has been sent to the GPU buffer. /// void OnFlush() { diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index f75174f72..a2d95dbf4 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -1002,7 +1002,7 @@ void AnimatedModel::Draw(RenderContext& renderContext) if (renderContext.View.Pass == DrawPass::GlobalSDF) return; if (renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas) - return; // No supported + return; // Not supported ACTOR_GET_WORLD_MATRIX(this, view, world); GEOMETRY_DRAW_STATE_EVENT_BEGIN(_drawState, world); diff --git a/Source/Engine/Platform/Base/FileSystemBase.cpp b/Source/Engine/Platform/Base/FileSystemBase.cpp index f414bbd01..13ae3481c 100644 --- a/Source/Engine/Platform/Base/FileSystemBase.cpp +++ b/Source/Engine/Platform/Base/FileSystemBase.cpp @@ -12,25 +12,25 @@ bool FileSystemBase::ShowOpenFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array& filenames) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowSaveFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array& filenames) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowBrowseFolderDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& title, String& path) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowFileExplorer(const StringView& path) { - // No supported + // Not supported return true; } From 73c19b278f1ffbfd7cb76de53390733267d7ea69 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 12:47:34 +0100 Subject: [PATCH 07/23] Optimize updating Animated Model bones buffer when it's not dirty #3827 --- Source/Engine/Level/Actors/AnimatedModel.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index a2d95dbf4..1bf05a505 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -1015,6 +1015,7 @@ void AnimatedModel::Draw(RenderContext& renderContext) RenderContext::GPULocker.Lock(); GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); RenderContext::GPULocker.Unlock(); + _skinningData.OnFlush(); } SkinnedMesh::DrawInfo draw; @@ -1059,6 +1060,7 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch) RenderContext::GPULocker.Lock(); GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); RenderContext::GPULocker.Unlock(); + _skinningData.OnFlush(); } SkinnedMesh::DrawInfo draw; From 4afd9fd8dfe3a37f05a9d4323ab9fb421d0367f2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 13:27:53 +0100 Subject: [PATCH 08/23] Optimize Animated Model bones buffer flushing with delayed draw action to reduce lock contention #3917 #3827 --- Source/Engine/Level/Actors/AnimatedModel.cpp | 15 +++++++++------ Source/Engine/Particles/Particles.cpp | 7 +++---- Source/Engine/Renderer/RenderList.cpp | 12 +++++------- Source/Engine/Renderer/RenderList.h | 7 ++++--- Source/Engine/Renderer/Renderer.cpp | 2 +- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index 1bf05a505..f6ac51f1c 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -20,6 +20,7 @@ #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/SceneObjectsFactory.h" #include "Engine/Profiler/ProfilerMemory.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Serialization/Serialization.h" AnimatedModel::AnimatedModel(const SpawnParams& params) @@ -1012,9 +1013,10 @@ void AnimatedModel::Draw(RenderContext& renderContext) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - RenderContext::GPULocker.Lock(); - GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - RenderContext::GPULocker.Unlock(); + renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) + { + context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); + }); _skinningData.OnFlush(); } @@ -1057,9 +1059,10 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - RenderContext::GPULocker.Lock(); - GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - RenderContext::GPULocker.Unlock(); + renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) + { + context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); + }); _skinningData.OnFlush(); } diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 951b657b2..7d3703ee0 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -677,11 +677,10 @@ void CleanupGPUParticlesSorting() SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer); } -void DrawEmittersGPU(RenderContextBatch& renderContextBatch) +void DrawEmittersGPU(GPUContext* context, RenderContextBatch& renderContextBatch) { PROFILE_GPU_CPU_NAMED("DrawEmittersGPU"); ScopeReadLock systemScope(Particles::SystemLocker); - GPUContext* context = GPUDevice::Instance->GetMainContext(); // Count draws and sorting passes needed for resources allocation uint32 indirectArgsSize = 0; @@ -1124,9 +1123,9 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff if (GPUEmitterDraws.Count() == 0) { // The first emitter schedules the drawing of all batched draws - renderContextBatch.GetMainContext().List->AddDelayedDraw([](RenderContextBatch& renderContextBatch, int32 contextIndex) + renderContextBatch.GetMainContext().List->AddDelayedDraw([](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) { - DrawEmittersGPU(renderContextBatch); + DrawEmittersGPU(context, renderContextBatch); }); } GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting }); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 453fd71a9..ba1f7a0f9 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -258,18 +258,17 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh void RenderList::AddDelayedDraw(DelayedDraw&& func) { - MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc _delayedDraws.Add(MoveTemp(func)); - MemPoolLocker.Unlock(); } -void RenderList::DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex) +void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) { - if (_delayedDraws.IsEmpty()) + if (_delayedDraws.Count() == 0) return; + PROFILE_CPU(); for (DelayedDraw& e : _delayedDraws) - e(renderContextBatch, contextIndex); - _delayedDraws.SetCapacity(0); + e(context, renderContextBatch, renderContextIndex); + _delayedDraws.Clear(); } void RenderList::BlendSettings() @@ -495,7 +494,6 @@ RenderList::RenderList(const SpawnParams& params) , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) - , _delayedDraws(&Memory) { } diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index a37d2a941..9b0a91ed6 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -459,13 +459,14 @@ public: /// DynamicTypedBuffer TempObjectBuffer; - typedef Function DelayedDraw; + typedef Function DelayedDraw; void AddDelayedDraw(DelayedDraw&& func); - void DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex); + void DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex); /// /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing. /// + /// Can be called in async during scene rendering (thread-safe internally). Lambda is allocated by concurrent arena allocator owned by the RenderList. template FORCE_INLINE void AddDelayedDraw(const T& lambda) { @@ -476,7 +477,7 @@ public: private: DynamicVertexBuffer _instanceBuffer; - Array _delayedDraws; + RenderListBuffer _delayedDraws; public: /// diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index fd7d43c8b..2f17e5294 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -461,7 +461,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->DrainDelayedDraws(renderContextBatch, i); + renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i); #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); From 27dd1bda253cd8b849802ed44165ea3e3575b74d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 21:57:16 +0100 Subject: [PATCH 09/23] Optimize `GPUVertexLayout::Get` to not use mutex on lookup read #3917 --- .../Graphics/Shaders/GPUVertexLayout.cpp | 33 ++++++++++--------- Source/Engine/Renderer/RenderList.cpp | 19 +++++++---- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp index 05c6d605a..6c34b2008 100644 --- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp +++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp @@ -44,23 +44,30 @@ namespace Dictionary LayoutCache; Dictionary VertexBufferCache; + // TODO: it's not safe to use map and then use again with a lock (find a better way, eg. using two maps, one first read-only and thread safe, second with mutex-guarded new values from this frame) GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count) { - GPUVertexLayout::Elements elements; - bool anyValid = false; - for (int32 slot = 0; slot < count; slot++) + GPUVertexLayout* result; + CacheLocker.Lock(); + if (!VertexBufferCache.TryGet(key, result)) { - if (key.Layouts[slot]) + GPUVertexLayout::Elements elements; + bool anyValid = false; + for (int32 slot = 0; slot < count; slot++) { - anyValid = true; - int32 start = elements.Count(); - elements.Add(key.Layouts[slot]->GetElements()); - for (int32 j = start; j < elements.Count(); j++) - elements.Get()[j].Slot = (byte)slot; + if (key.Layouts[slot]) + { + anyValid = true; + int32 start = elements.Count(); + elements.Add(key.Layouts[slot]->GetElements()); + for (int32 j = start; j < elements.Count(); j++) + elements.Get()[j].Slot = (byte)slot; + } } + result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; + VertexBufferCache.Add(key, result); } - GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; - VertexBufferCache.Add(key, result); + CacheLocker.Unlock(); return result; } } @@ -185,11 +192,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& vertexBuffers) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!VertexBufferCache.TryGet(key, result)) result = AddCache(key, vertexBuffers.Length()); - CacheLocker.Unlock(); return result; } @@ -209,11 +214,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!VertexBufferCache.TryGet(key, result)) result = AddCache(key, layouts.Length()); - CacheLocker.Unlock(); return result; } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index ba1f7a0f9..23fcd52dc 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -825,6 +825,13 @@ FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b) Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0; } +FORCE_INLINE Span GetVB(GPUBuffer* const* ptr, int32 maxSize) +{ + while (ptr[maxSize - 1] == nullptr && maxSize > 1) + maxSize--; + return ToSpan(ptr, maxSize); +} + void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input) { if (list.IsEmpty()) @@ -953,7 +960,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(activeDraw->Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets); + context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets); context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex); instanceBufferOffset += activeCount; @@ -970,7 +977,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL // Single-draw call batch context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); @@ -993,7 +1000,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets); + context->BindVB(GetVB(vb, vbMax + 1), vbOffsets); if (drawCall.InstanceCount == 0) { @@ -1023,7 +1030,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex]; context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { @@ -1044,7 +1051,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL bindParams.DrawCall->Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); for (int32 j = 0; j < batch.Instances.Count(); j++) { @@ -1068,7 +1075,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL drawCall.Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { From a855b17cc0f02bce6b72350e0e146837a49eb992 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 7 Feb 2026 00:44:51 +0100 Subject: [PATCH 10/23] Add new collection type `ConcurrentDictionary` #3917 #3827 --- Source/Engine/Core/Collections/Dictionary.h | 4 + .../Engine/Threading/ConcurrentDictionary.h | 316 ++++++++++++++++++ 2 files changed, 320 insertions(+) create mode 100644 Source/Engine/Threading/ConcurrentDictionary.h diff --git a/Source/Engine/Core/Collections/Dictionary.h b/Source/Engine/Core/Collections/Dictionary.h index e2f5f0ed6..e18a5b999 100644 --- a/Source/Engine/Core/Collections/Dictionary.h +++ b/Source/Engine/Core/Collections/Dictionary.h @@ -4,6 +4,9 @@ #include "HashSetBase.h" +template +class ConcurrentDictionary; + /// /// Describes single portion of space for the key and value pair in a hash map. /// @@ -13,6 +16,7 @@ struct DictionaryBucket friend Memory; friend HashSetBase; friend Dictionary; + friend ConcurrentDictionary; /// The key. KeyType Key; diff --git a/Source/Engine/Threading/ConcurrentDictionary.h b/Source/Engine/Threading/ConcurrentDictionary.h new file mode 100644 index 000000000..22395d798 --- /dev/null +++ b/Source/Engine/Threading/ConcurrentDictionary.h @@ -0,0 +1,316 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Platform/CriticalSection.h" + +/// +/// Template for unordered dictionary with mapped key with value pairs that supports asynchronous data reading and writing. +/// +/// The type of the keys in the dictionary. +/// The type of the values in the dictionary. +/// The type of memory allocator. +template +class ConcurrentDictionary : Dictionary +{ + friend ConcurrentDictionary; +public: + typedef Dictionary Base; + typedef DictionaryBucket Bucket; + using AllocationData = typename AllocationType::template Data; + using AllocationTag = typename AllocationType::Tag; + +private: + mutable volatile int64 _threadsReading = 0; + volatile int64 _threadsWriting = 0; + CriticalSection _locker; + +public: + /// + /// Initializes an empty without reserving any space. + /// + ConcurrentDictionary() + { + } + + /// + /// Initializes an empty without reserving any space. + /// + /// The custom allocation tag. + ConcurrentDictionary(AllocationTag tag) + : Base(tag) + { + } + + /// + /// Finalizes an instance of the class. + /// + ~ConcurrentDictionary() + { + Clear(); + } + +public: + /// + /// Gets the amount of the elements in the collection. + /// + int32 Count() const + { + Reader reader(this); + return Base::_elementsCount; + } + + /// + /// Gets the amount of the elements that can be contained by the collection. + /// + int32 Capacity() const + { + Reader reader(this); + return Base::_size; + } + + /// + /// Tries to get element with given key. + /// + /// The key of the element. + /// The result value. + /// True if element of given key has been found, otherwise false. + template + bool TryGet(const KeyComparableType& key, ValueType& result) const + { + Reader reader(this); + typename Base::FindPositionResult pos; + Base::FindPosition(key, pos); + if (pos.ObjectIndex != -1) + result = Base::_allocation.Get()[pos.ObjectIndex].Value; + return pos.ObjectIndex != -1; + } + +public: + /// + /// Adds a pair of key and value to the collection. + /// + /// The key. + /// The value. + /// True if added element, otherwise false if it already exists (or other thread added it). + template + bool Add(const KeyComparableType& key, const ValueType& value) + { + Writer writer(this); + Bucket* bucket = Base::OnAdd(key, false, true); + if (bucket) + bucket->Occupy(key, value); + return bucket != nullptr; + } + + /// + /// Removes element with a specified key. + /// + /// The element key to remove. + /// True if item was removed from collection, otherwise false. + template + bool Remove(const KeyComparableType& key) + { + Writer writer(this); + return Base::Remove(key); + } + +public: + /// + /// Removes all elements from the collection. + /// + void Clear() + { + Writer writer(this); + Base::Clear(); + } + +public: + /// + /// The read-only dictionary collection iterator. + /// + struct ConstIterator : Base::IteratorBase + { + friend ConcurrentDictionary; + public: + ConstIterator(const ConcurrentDictionary* collection, const int32 index) + : Base::IteratorBase(collection, index) + { + if (collection) + collection->BeginRead(); + } + + ConstIterator(const ConstIterator& i) + : Base::IteratorBase(i._collection, i._index) + { + if (i.collection) + i.collection->BeginRead(); + } + + ConstIterator(ConstIterator&& i) noexcept + : Base::IteratorBase(i._collection, i._index) + { + i._collection = nullptr; + } + + ~ConstIterator() + { + if (this->_collection) + ((ConcurrentDictionary*)this->_collection)->EndRead(); + } + + public: + FORCE_INLINE bool operator!() const + { + return !(bool)*this; + } + + FORCE_INLINE bool operator==(const ConstIterator& v) const + { + return this->_index == v._index && this->_collection == v._collection; + } + + FORCE_INLINE bool operator!=(const ConstIterator& v) const + { + return this->_index != v._index || this->_collection != v._collection; + } + + ConstIterator& operator=(const ConstIterator& v) + { + this->_collection = v._collection; + this->_index = v._index; + return *this; + } + + ConstIterator& operator=(ConstIterator&& v) noexcept + { + this->_collection = v._collection; + this->_index = v._index; + v._collection = nullptr; + return *this; + } + + ConstIterator& operator++() + { + this->Next(); + return *this; + } + + ConstIterator operator++(int) const + { + ConstIterator i = *this; + i.Next(); + return i; + } + + ConstIterator& operator--() + { + this->Prev(); + return *this; + } + + ConstIterator operator--(int) const + { + ConstIterator i = *this; + i.Prev(); + return i; + } + }; + + ConstIterator begin() const + { + ConstIterator i(this, -1); + ++i; + return i; + } + + FORCE_INLINE ConstIterator end() const + { + return ConstIterator(this, Base::_size); + } + +private: + void BeginWrite() + { + Platform::InterlockedIncrement(&_threadsWriting); + + // Wait for all reads to end + RETRY: + while (Platform::AtomicRead(&_threadsReading)) + Platform::Yield(); + + // Thread-safe writing + _locker.Lock(); + if (Platform::AtomicRead(&_threadsReading)) + { + // Other reader entered during mutex locking so give them a chance to transition into active-waiting + _locker.Unlock(); + goto RETRY; + } + } + + void EndWrite() + { + _locker.Unlock(); + Platform::InterlockedDecrement(&_threadsWriting); + } + + void BeginRead() const + { + RETRY: + Platform::InterlockedIncrement(&_threadsReading); + + // Check if any thread is writing (or is about to write) + if (Platform::AtomicRead(&_threadsWriting) != 0) + { + // Wait for all writes to end + Platform::InterlockedDecrement(&_threadsReading); + while (Platform::AtomicRead(&_threadsWriting)) + Platform::Yield(); + + // Try again + goto RETRY; + } + } + + void EndRead() const + { + Platform::InterlockedDecrement(&_threadsReading); + } + +private: + // Utility for methods that read-write state. + struct Writer + { + ConcurrentDictionary* _collection; + + Writer(ConcurrentDictionary* collection) + : _collection(collection) + { + _collection->BeginWrite(); + } + + ~Writer() + { + _collection->EndWrite(); + } + }; + + // Utility for methods that read-only state. + struct Reader + { + const ConcurrentDictionary* _collection; + + Reader(const ConcurrentDictionary* collection) + : _collection(collection) + { + _collection->BeginRead(); + } + + ~Reader() + { + _collection->EndRead(); + } + }; +}; From ecddb8aae5b211ce3a89da42e41e415887ae5ffc Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 7 Feb 2026 00:45:14 +0100 Subject: [PATCH 11/23] Optimize `GPUVertexLayout` caches with `ConcurrentDictionary` --- .../Graphics/Shaders/GPUVertexLayout.cpp | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp index 6c34b2008..e458ff1c1 100644 --- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp +++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp @@ -8,6 +8,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Threading/ConcurrentDictionary.h" #if GPU_ENABLE_RESOURCE_NAMING #include "Engine/Scripting/Enums.h" #endif @@ -40,15 +41,12 @@ uint32 GetHash(const VertexBufferLayouts& key) namespace { - CriticalSection CacheLocker; - Dictionary LayoutCache; - Dictionary VertexBufferCache; + ConcurrentDictionary LayoutCache; + ConcurrentDictionary VertexBufferCache; - // TODO: it's not safe to use map and then use again with a lock (find a better way, eg. using two maps, one first read-only and thread safe, second with mutex-guarded new values from this frame) - GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count) + GPUVertexLayout* GetCache(const VertexBufferLayouts& key, int32 count) { GPUVertexLayout* result; - CacheLocker.Lock(); if (!VertexBufferCache.TryGet(key, result)) { GPUVertexLayout::Elements elements; @@ -65,9 +63,15 @@ namespace } } result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; - VertexBufferCache.Add(key, result); + if (!VertexBufferCache.Add(key, result)) + { + // Other thread added the value + Delete(result); + bool found = VertexBufferCache.TryGet(key, result); + ASSERT(found); + } + } - CacheLocker.Unlock(); return result; } } @@ -155,7 +159,6 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff } // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!LayoutCache.TryGet(hash, result)) { @@ -167,12 +170,16 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff LOG(Error, " {}", e.ToString()); #endif LOG(Error, "Failed to create vertex layout"); - CacheLocker.Unlock(); return nullptr; } - LayoutCache.Add(hash, result); + if (!LayoutCache.Add(hash, result)) + { + // Other thread added the value + Delete(result); + bool found = LayoutCache.TryGet(hash, result); + ASSERT(found); + } } - CacheLocker.Unlock(); return result; } @@ -192,11 +199,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& vertexBuffers) key.Layouts[i] = nullptr; // Lookup existing cache - GPUVertexLayout* result; - if (!VertexBufferCache.TryGet(key, result)) - result = AddCache(key, vertexBuffers.Length()); - - return result; + return GetCache(key, vertexBuffers.Length()); } GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) @@ -214,11 +217,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) key.Layouts[i] = nullptr; // Lookup existing cache - GPUVertexLayout* result; - if (!VertexBufferCache.TryGet(key, result)) - result = AddCache(key, layouts.Length()); - - return result; + return GetCache(key, layouts.Length()); } GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder) From ed5ad91a32cc6b4d01410a86e12f20ba6ee42243 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sat, 7 Feb 2026 00:46:32 +0100 Subject: [PATCH 12/23] Optimize `SkinnedModel::GetSkeletonMapping` to not use locking for better perf when multi-threading #3827 --- Source/Engine/Content/Assets/SkinnedModel.cpp | 27 ++++++++++++------- Source/Engine/Content/Assets/SkinnedModel.h | 8 +++--- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Content/Assets/SkinnedModel.cpp b/Source/Engine/Content/Assets/SkinnedModel.cpp index ed41c4aab..ce6300b17 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.cpp +++ b/Source/Engine/Content/Assets/SkinnedModel.cpp @@ -61,16 +61,24 @@ Array SkinnedModel::GetBlendShapes() SkinnedModel::SkeletonMapping SkinnedModel::GetSkeletonMapping(Asset* source, bool autoRetarget) { + // Fast-path to use cached mapping SkeletonMapping mapping; mapping.TargetSkeleton = this; + SkeletonMappingData mappingData; + if (_skeletonMappingCache.TryGet(source, mappingData)) + { + mapping.SourceSkeleton = mappingData.SourceSkeleton; + mapping.NodesMapping = mappingData.NodesMapping; + return mapping; + } + mapping.SourceSkeleton = nullptr; + if (WaitForLoaded() || !source || source->WaitForLoaded()) return mapping; + PROFILE_CPU(); ScopeLock lock(Locker); - SkeletonMappingData mappingData; if (!_skeletonMappingCache.TryGet(source, mappingData)) { - PROFILE_CPU(); - // Initialize the mapping SkeletonRetarget* retarget = nullptr; const Guid sourceId = source->GetID(); @@ -823,13 +831,13 @@ bool SkinnedModel::SaveMesh(WriteStream& stream, const ModelData& modelData, int void SkinnedModel::ClearSkeletonMapping() { - for (auto& e : _skeletonMappingCache) + for (const auto& e : _skeletonMappingCache) { e.Key->OnUnloaded.Unbind(this); #if USE_EDITOR e.Key->OnReloading.Unbind(this); #endif - Allocator::Free(e.Value.NodesMapping.Get()); + Allocator::Free((void*)e.Value.NodesMapping.Get()); } _skeletonMappingCache.Clear(); } @@ -837,8 +845,9 @@ void SkinnedModel::ClearSkeletonMapping() void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj) { ScopeLock lock(Locker); - auto i = _skeletonMappingCache.Find(obj); - ASSERT(i != _skeletonMappingCache.End()); + SkeletonMappingData mappingData; + bool found = _skeletonMappingCache.TryGet(obj, mappingData); + ASSERT(found); // Unlink event obj->OnUnloaded.Unbind(this); @@ -847,8 +856,8 @@ void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj) #endif // Clear cache - Allocator::Free(i->Value.NodesMapping.Get()); - _skeletonMappingCache.Remove(i); + Allocator::Free(mappingData.NodesMapping.Get()); + _skeletonMappingCache.Remove(obj); } uint64 SkinnedModel::GetMemoryUsage() const diff --git a/Source/Engine/Content/Assets/SkinnedModel.h b/Source/Engine/Content/Assets/SkinnedModel.h index 894a080c4..111d4d6cb 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.h +++ b/Source/Engine/Content/Assets/SkinnedModel.h @@ -3,7 +3,7 @@ #pragma once #include "ModelBase.h" -#include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Threading/ConcurrentDictionary.h" #include "Engine/Graphics/Models/SkinnedMesh.h" #include "Engine/Graphics/Models/SkeletonData.h" @@ -101,9 +101,9 @@ public: struct FLAXENGINE_API SkeletonMapping { // Target skeleton. - AssetReference TargetSkeleton; + SkinnedModel* TargetSkeleton; // Source skeleton. - AssetReference SourceSkeleton; + SkinnedModel* SourceSkeleton; // The node-to-node mapping for the fast animation sampling for the skinned model skeleton nodes. Each item is index of the source skeleton node into target skeleton node. Span NodesMapping; }; @@ -115,7 +115,7 @@ private: Span NodesMapping; }; - Dictionary _skeletonMappingCache; + ConcurrentDictionary _skeletonMappingCache; public: /// From 9c32f978fb7f3ab2ce0ef3b7e550487ec052c1e1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 8 Feb 2026 00:22:37 +0100 Subject: [PATCH 13/23] Fix regression from 73c19b278f1ffbfd7cb76de53390733267d7ea69 to fix missing skeleton bones on start in Editor --- Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp index eaa4aa5a9..eb2ea0145 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp @@ -28,7 +28,7 @@ void SkinnedMeshDrawData::Setup(int32 bonesCount) BonesCount = bonesCount; _hasValidData = false; - _isDirty = false; + _isDirty = true; Data.Resize(BoneMatrices->GetSize()); SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices); } From 3d66316716307e8a8d9acb069b71e6a7c65297c1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 15:03:42 +0100 Subject: [PATCH 14/23] Optimize animations retargeting between skeletons Cuts down `RetargetSkeletonPose` time down by over 80%. #3827 --- .../Animations/Graph/AnimGroup.Animation.cpp | 135 +++++++++--------- Source/Engine/Content/Assets/SkinnedModel.cpp | 2 + Source/Engine/Graphics/Models/SkeletonData.h | 9 ++ Source/Engine/Graphics/Models/SkinnedMesh.cpp | 36 +++++ 4 files changed, 116 insertions(+), 66 deletions(-) diff --git a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp index 08767728a..7eb8d32d6 100644 --- a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp +++ b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp @@ -109,86 +109,84 @@ namespace nodes->RootMotion.Orientation.Normalize(); } } - - Matrix ComputeWorldMatrixRecursive(const SkeletonData& skeleton, int32 index, Matrix localMatrix) - { - const auto& node = skeleton.Nodes[index]; - index = node.ParentIndex; - while (index != -1) - { - const auto& parent = skeleton.Nodes[index]; - localMatrix *= parent.LocalTransform.GetWorld(); - index = parent.ParentIndex; - } - return localMatrix; - } - - Matrix ComputeInverseParentMatrixRecursive(const SkeletonData& skeleton, int32 index) - { - Matrix inverseParentMatrix = Matrix::Identity; - const auto& node = skeleton.Nodes[index]; - if (node.ParentIndex != -1) - { - inverseParentMatrix = ComputeWorldMatrixRecursive(skeleton, index, inverseParentMatrix); - inverseParentMatrix = Matrix::Invert(inverseParentMatrix); - } - return inverseParentMatrix; - } } -void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 targetIndex) +// Utility for retargeting animation poses between skeletons. +struct Retargeting { - // sourceSkeleton - skeleton of Anim Graph (Base Locomotion pack) - // targetSkeleton - visual mesh skeleton (City Characters pack) - // target - anim graph input/output transformation of that node - const auto& targetNode = targetSkeleton.Nodes[targetIndex]; - const int32 sourceIndex = sourceMapping.NodesMapping[targetIndex]; - if (sourceIndex == -1) +private: + const Matrix* _sourcePosePtr, * _targetPosePtr; + const SkeletonData* _sourceSkeleton, *_targetSkeleton; + const SkinnedModel::SkeletonMapping* _sourceMapping; + +public: + void Init(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping) { - // Use T-pose - node = targetNode.LocalTransform; - return; + ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == sourceMapping.NodesMapping.Length()); + + // Cache world-space poses for source and target skeletons to avoid redundant calculations during retargeting + _sourcePosePtr = sourceSkeleton.GetNodesPose().Get(); + _targetPosePtr = targetSkeleton.GetNodesPose().Get(); + + _sourceSkeleton = &sourceSkeleton; + _targetSkeleton = &targetSkeleton; + _sourceMapping = &sourceMapping; } - const auto& sourceNode = sourceSkeleton.Nodes[sourceIndex]; - // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/] - - // Calculate T-Pose of source node, target node and target parent node - Matrix bindMatrix = ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, sourceNode.LocalTransform.GetWorld()); - Matrix inverseBindMatrix = Matrix::Invert(bindMatrix); - Matrix targetMatrix = ComputeWorldMatrixRecursive(targetSkeleton, targetIndex, targetNode.LocalTransform.GetWorld()); - Matrix inverseParentMatrix = ComputeInverseParentMatrixRecursive(targetSkeleton, targetIndex); - - // Target node animation is world-space difference of the animated source node inside the target's parent node world-space - Matrix localMatrix = inverseBindMatrix * ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, node.GetWorld()); - localMatrix = targetMatrix * localMatrix * inverseParentMatrix; - - // Extract local node transformation - localMatrix.Decompose(node); -} - -void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes) -{ - // TODO: cache source and target skeletons world-space poses for faster retargeting (use some pooled memory) - ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == mapping.NodesMapping.Length()); - for (int32 targetIndex = 0; targetIndex < targetSkeleton.Nodes.Count(); targetIndex++) + void RetargetNode(const Transform& source, Transform& target, int32 sourceIndex, int32 targetIndex) { - auto& targetNode = targetSkeleton.Nodes.Get()[targetIndex]; - const int32 sourceIndex = mapping.NodesMapping.Get()[targetIndex]; - Transform node; + // sourceSkeleton - skeleton of Anim Graph + // targetSkeleton - visual mesh skeleton + // target - anim graph input/output transformation of that node + const SkeletonNode& targetNode = _targetSkeleton->Nodes.Get()[targetIndex]; if (sourceIndex == -1) { // Use T-pose - node = targetNode.LocalTransform; + target = targetNode.LocalTransform; } else { - // Retarget - node = sourceNodes[sourceIndex]; - RetargetSkeletonNode(sourceSkeleton, targetSkeleton, mapping, node, targetIndex); + // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/] + + // Calculate T-Pose of source node, target node and target parent node + const Matrix* sourcePosePtr = _sourcePosePtr; + const Matrix* targetPosePtr = _targetPosePtr; + const Matrix& bindMatrix = sourcePosePtr[sourceIndex]; + const Matrix& targetMatrix = targetPosePtr[targetIndex]; + Matrix inverseParentMatrix; + if (targetNode.ParentIndex != -1) + Matrix::Invert(targetPosePtr[targetNode.ParentIndex], inverseParentMatrix); + else + inverseParentMatrix = Matrix::Identity; + + // Target node animation is world-space difference of the animated source node inside the target's parent node world-space + const SkeletonNode& sourceNode = _sourceSkeleton->Nodes.Get()[sourceIndex]; + Matrix localMatrix = source.GetWorld(); + if (sourceNode.ParentIndex != -1) + localMatrix = localMatrix * sourcePosePtr[sourceNode.ParentIndex]; + localMatrix = Matrix::Invert(bindMatrix) * localMatrix; + localMatrix = targetMatrix * localMatrix * inverseParentMatrix; + + // Extract local node transformation + localMatrix.Decompose(target); } - targetNodes[targetIndex] = node; } + + FORCE_INLINE void RetargetPose(const Transform* sourceNodes, Transform* targetNodes) + { + for (int32 targetIndex = 0; targetIndex < _targetSkeleton->Nodes.Count(); targetIndex++) + { + const int32 sourceIndex = _sourceMapping->NodesMapping.Get()[targetIndex]; + RetargetNode(sourceNodes[sourceIndex], targetNodes[targetIndex], sourceIndex, targetIndex); + } + } +}; + +void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes) +{ + Retargeting retargeting; + retargeting.Init(sourceSkeleton, targetSkeleton, mapping); + retargeting.RetargetPose(sourceNodes, targetNodes); } AnimGraphTraceEvent& AnimGraphContext::AddTraceEvent(const AnimGraphNode* node) @@ -431,9 +429,13 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode* const bool weighted = weight < 1.0f; const bool retarget = mapping.SourceSkeleton && mapping.SourceSkeleton != mapping.TargetSkeleton; const auto emptyNodes = GetEmptyNodes(); + Retargeting retargeting; SkinnedModel::SkeletonMapping sourceMapping; if (retarget) + { sourceMapping = _graph.BaseModel->GetSkeletonMapping(mapping.SourceSkeleton); + retargeting.Init(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, mapping); + } for (int32 nodeIndex = 0; nodeIndex < nodes->Nodes.Count(); nodeIndex++) { const int32 nodeToChannel = mapping.NodesMapping[nodeIndex]; @@ -447,7 +449,8 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode* // Optionally retarget animation into the skeleton used by the Anim Graph if (retarget) { - RetargetSkeletonNode(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, sourceMapping, srcNode, nodeIndex); + const int32 sourceIndex = sourceMapping.NodesMapping[nodeIndex]; + retargeting.RetargetNode(srcNode, srcNode, sourceIndex, nodeIndex); } // Mark node as used diff --git a/Source/Engine/Content/Assets/SkinnedModel.cpp b/Source/Engine/Content/Assets/SkinnedModel.cpp index ce6300b17..c0355ea0e 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.cpp +++ b/Source/Engine/Content/Assets/SkinnedModel.cpp @@ -378,6 +378,7 @@ bool SkinnedModel::SetupSkeleton(const Array& nodes) model->Skeleton.Bones[i].LocalTransform = node.LocalTransform; model->Skeleton.Bones[i].NodeIndex = i; } + model->Skeleton.Dirty(); ClearSkeletonMapping(); // Calculate offset matrix (inverse bind pose transform) for every bone manually @@ -435,6 +436,7 @@ bool SkinnedModel::SetupSkeleton(const Array& nodes, const ArraySkeleton.Nodes = nodes; model->Skeleton.Bones = bones; + model->Skeleton.Dirty(); ClearSkeletonMapping(); // Calculate offset matrix (inverse bind pose transform) for every bone manually diff --git a/Source/Engine/Graphics/Models/SkeletonData.h b/Source/Engine/Graphics/Models/SkeletonData.h index 0b6c7d4d7..79e0be512 100644 --- a/Source/Engine/Graphics/Models/SkeletonData.h +++ b/Source/Engine/Graphics/Models/SkeletonData.h @@ -73,6 +73,10 @@ struct TIsPODType /// class FLAXENGINE_API SkeletonData { +private: + mutable volatile int64 _dirty = 1; + mutable Array _cachedPose; + public: /// /// The nodes in this hierarchy. The root node is always at the index 0. @@ -114,6 +118,11 @@ public: int32 FindNode(const StringView& name) const; int32 FindBone(int32 nodeIndex) const; + // Gets the skeleton nodes transforms in mesh space (pose). Calculated from the local node transforms and hierarchy. Cached internally and updated when data is dirty. + const Array& GetNodesPose() const; + + // Marks data as dirty (modified) to update internal state and recalculate cached data if needed (eg. skeleton pose). + void Dirty(); uint64 GetMemoryUsage() const; /// diff --git a/Source/Engine/Graphics/Models/SkinnedMesh.cpp b/Source/Engine/Graphics/Models/SkinnedMesh.cpp index 66b3e5701..369e5d825 100644 --- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp @@ -154,6 +154,8 @@ void SkeletonData::Swap(SkeletonData& other) { Nodes.Swap(other.Nodes); Bones.Swap(other.Bones); + Dirty(); + other.Dirty(); } Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const @@ -171,6 +173,7 @@ Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const void SkeletonData::SetNodeTransform(int32 nodeIndex, const Transform& value) { CHECK(Nodes.IsValidIndex(nodeIndex)); + Dirty(); const int32 parentIndex = Nodes[nodeIndex].ParentIndex; if (parentIndex == -1) { @@ -201,6 +204,39 @@ int32 SkeletonData::FindBone(int32 nodeIndex) const return -1; } +const Array& SkeletonData::GetNodesPose() const +{ + // Guard with a simple atomic flag to avoid locking if the pose is up to date + if (Platform::AtomicRead(&_dirty)) + { + ScopeLock lock(RenderContext::GPULocker); + if (Platform::AtomicRead(&_dirty)) + { + Platform::AtomicStore(&_dirty, 0); + const SkeletonNode* nodes = Nodes.Get(); + const int32 nodesCount = Nodes.Count(); + _cachedPose.Resize(nodesCount); + Matrix* posePtr = _cachedPose.Get(); + for (int32 nodeIndex = 0; nodeIndex < nodesCount; nodeIndex++) + { + const SkeletonNode& node = nodes[nodeIndex]; + Matrix local; + Matrix::Transformation(node.LocalTransform.Scale, node.LocalTransform.Orientation, node.LocalTransform.Translation, local); + if (node.ParentIndex != -1) + Matrix::Multiply(local, posePtr[node.ParentIndex], posePtr[nodeIndex]); + else + posePtr[nodeIndex] = local; + } + } + } + return _cachedPose; +} + +void SkeletonData::Dirty() +{ + Platform::AtomicStore(&_dirty, 1); +} + uint64 SkeletonData::GetMemoryUsage() const { uint64 result = Nodes.Capacity() * sizeof(SkeletonNode) + Bones.Capacity() * sizeof(SkeletonBone); From a2b0d0714e89cfb07647211134b404d1b6a217d0 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 15:03:54 +0100 Subject: [PATCH 15/23] Add more docs about new `ConcurrentDictionary` --- Source/Engine/Threading/ConcurrentDictionary.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Source/Engine/Threading/ConcurrentDictionary.h b/Source/Engine/Threading/ConcurrentDictionary.h index 22395d798..1b78a735e 100644 --- a/Source/Engine/Threading/ConcurrentDictionary.h +++ b/Source/Engine/Threading/ConcurrentDictionary.h @@ -7,6 +7,8 @@ /// /// Template for unordered dictionary with mapped key with value pairs that supports asynchronous data reading and writing. +/// Implemented via reader-writer lock pattern, so multiple threads can read data at the same time, but only one thread can write data and it blocks all other threads (including readers) until the write operation is finished. +/// Optimized for frequent reads (no lock operation). /// /// The type of the keys in the dictionary. /// The type of the values in the dictionary. From bd300651ecc61b4f94500658b0fbdce223a53dfe Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 15:04:19 +0100 Subject: [PATCH 16/23] Fix Job System regression bug with incorrect initialization of job context --- Source/Engine/Threading/JobSystem.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 4d90f1c06..8d62aa8e3 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -38,29 +38,23 @@ public: struct alignas(int64) JobContext { // The next index of the job to process updated when picking a job by the thread. - volatile int64 JobIndex; + volatile int64 JobIndex = 0; // The number of jobs left to process updated after job completion by the thread. - volatile int64 JobsLeft; + volatile int64 JobsLeft = 0; // The unique label of this job used to identify it. Set to -1 when job is done. - volatile int64 JobLabel; + volatile int64 JobLabel = 0; // Utility atomic counter used to indicate that any job is waiting for this one to finish. Then Dependants can be accessed within thread-safe JobsLocker. - volatile int64 DependantsCount; + volatile int64 DependantsCount = 0; // The number of dependency jobs left to be finished before starting this job. - volatile int64 DependenciesLeft; + volatile int64 DependenciesLeft = 0; // The total number of jobs to process (in this context). - int32 JobsCount; + int32 JobsCount = 0; // The job function to execute. Function Job; // List of dependant jobs to signal when this job is done. Array Dependants; }; -template<> -struct TIsPODType -{ - enum { Value = false }; -}; - class JobSystemThread : public IRunnable { public: @@ -111,7 +105,7 @@ bool JobSystemService::Init() JobContextsSize = 256; JobContextsMask = JobContextsSize - 1; JobContexts = (JobContext*)Platform::Allocate(JobContextsSize * sizeof(JobContext), alignof(JobContext)); - Platform::MemoryClear(JobContexts, sizeof(JobContextsSize * sizeof(JobContext))); + Memory::ConstructItems(JobContexts, (int32)JobContextsSize); // Spawn threads ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); @@ -150,6 +144,7 @@ void JobSystemService::Dispose() } } + Memory::DestructItems(JobContexts, (int32)JobContextsSize); Platform::Free(JobContexts); JobContexts = nullptr; } From 7b7a92758ff1a6f040c535b8e5a8858dd36d7380 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 18:01:47 +0100 Subject: [PATCH 17/23] Optimize `VariantType` to use static type name in game or from non-reloadable assemblies This avoids many dynamic memory allocations in Visual Scripts and Anim Graph. # --- Source/Engine/Core/Types/Variant.cpp | 160 +++++++++++++----- Source/Engine/Core/Types/Variant.h | 36 +++- Source/Engine/Scripting/BinaryModule.cpp | 2 + Source/Engine/Scripting/BinaryModule.h | 5 + .../Engine/Scripting/ManagedCLR/MAssembly.h | 9 + Source/Engine/Scripting/ManagedCLR/MCore.cpp | 2 + Source/Engine/Scripting/Runtime/DotNet.cpp | 1 + Source/Engine/Scripting/Scripting.cpp | 4 + Source/Engine/Serialization/Serialization.cpp | 3 + Source/Engine/Serialization/Stream.cpp | 1 + 10 files changed, 170 insertions(+), 53 deletions(-) diff --git a/Source/Engine/Core/Types/Variant.cpp b/Source/Engine/Core/Types/Variant.cpp index 4ab8552d3..bd2d594df 100644 --- a/Source/Engine/Core/Types/Variant.cpp +++ b/Source/Engine/Core/Types/Variant.cpp @@ -18,8 +18,10 @@ #include "Engine/Core/Math/Ray.h" #include "Engine/Core/Math/Rectangle.h" #include "Engine/Core/Math/Transform.h" +#include "Engine/Scripting/BinaryModule.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/ScriptingObject.h" +#include "Engine/Scripting/ManagedCLR/MAssembly.h" #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Scripting/ManagedCLR/MUtils.h" @@ -33,6 +35,13 @@ #endif #define AsEnum AsUint64 +// Editor can hot-reload assemblies thus cached type names may become invalid, otherwise use modules that are never unloaded and their type names are always valid +#if USE_EDITOR +#define IS_VARIANT_TYPE_NAME_STATIC(canReload) !canReload +#else +#define IS_VARIANT_TYPE_NAME_STATIC(canReload) true +#endif + namespace { const char* InBuiltTypesTypeNames[40] = @@ -88,6 +97,7 @@ static_assert((int32)VariantType::Types::MAX == ARRAY_COUNT(InBuiltTypesTypeName VariantType::VariantType(Types type, const StringView& typeName) { Type = type; + StaticName = 0; TypeName = nullptr; const int32 length = typeName.Length(); if (length) @@ -98,32 +108,41 @@ VariantType::VariantType(Types type, const StringView& typeName) } } -VariantType::VariantType(Types type, const StringAnsiView& typeName) +VariantType::VariantType(Types type, const StringAnsiView& typeName, bool staticName) { Type = type; - TypeName = nullptr; - int32 length = typeName.Length(); - if (length) + StaticName = staticName && (typeName.HasChars() && typeName[typeName.Length()] == 0); // Require string to be null-terminated (not fully safe check) + if (staticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, typeName.Get(), length); - TypeName[length] = 0; + TypeName = (char*)typeName.Get(); } + else + { + TypeName = nullptr; + int32 length = typeName.Length(); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, typeName.Get(), length); + TypeName[length] = 0; + } + } +} + +VariantType::VariantType(Types type, const ScriptingType& sType) + : VariantType(type) +{ + SetTypeName(sType); } VariantType::VariantType(Types type, const MClass* klass) { Type = type; + StaticName = false; TypeName = nullptr; #if USE_CSHARP if (klass) - { - const StringAnsiView typeName = klass->GetFullName(); - const int32 length = typeName.Length(); - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, typeName.Get(), length); - TypeName[length] = 0; - } + SetTypeName(*klass); #endif } @@ -190,9 +209,9 @@ VariantType::VariantType(const StringAnsiView& typeName) if (const auto mclass = Scripting::FindClass(typeName)) { if (mclass->IsEnum()) - new(this) VariantType(Enum, typeName); + new(this) VariantType(Enum, mclass); else - new(this) VariantType(ManagedObject, typeName); + new(this) VariantType(ManagedObject, mclass); return; } #endif @@ -204,36 +223,48 @@ VariantType::VariantType(const StringAnsiView& typeName) VariantType::VariantType(const VariantType& other) { Type = other.Type; - TypeName = nullptr; - const int32 length = StringUtils::Length(other.TypeName); - if (length) + StaticName = other.StaticName; + if (StaticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, other.TypeName, length); - TypeName[length] = 0; + TypeName = other.TypeName; + } + else + { + TypeName = nullptr; + const int32 length = StringUtils::Length(other.TypeName); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, other.TypeName, length); + TypeName[length] = 0; + } } } VariantType::VariantType(VariantType&& other) noexcept { Type = other.Type; + StaticName = other.StaticName; TypeName = other.TypeName; other.Type = Null; other.TypeName = nullptr; + other.StaticName = 0; } VariantType& VariantType::operator=(const Types& type) { Type = type; - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); TypeName = nullptr; + StaticName = 0; return *this; } VariantType& VariantType::operator=(VariantType&& other) { ASSERT(this != &other); - Swap(Type, other.Type); + Swap(Packed, other.Packed); Swap(TypeName, other.TypeName); return *this; } @@ -242,14 +273,23 @@ VariantType& VariantType::operator=(const VariantType& other) { ASSERT(this != &other); Type = other.Type; - Allocator::Free(TypeName); - TypeName = nullptr; - const int32 length = StringUtils::Length(other.TypeName); - if (length) + if (StaticName) + Allocator::Free(TypeName); + StaticName = other.StaticName; + if (StaticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, other.TypeName, length); - TypeName[length] = 0; + TypeName = other.TypeName; + } + else + { + TypeName = nullptr; + const int32 length = StringUtils::Length(other.TypeName); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, other.TypeName, length); + TypeName[length] = 0; + } } return *this; } @@ -283,24 +323,45 @@ void VariantType::SetTypeName(const StringView& typeName) { if (StringUtils::Length(TypeName) != typeName.Length()) { - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); + StaticName = 0; TypeName = static_cast(Allocator::Allocate(typeName.Length() + 1)); TypeName[typeName.Length()] = 0; } StringUtils::ConvertUTF162ANSI(typeName.Get(), TypeName, typeName.Length()); } -void VariantType::SetTypeName(const StringAnsiView& typeName) +void VariantType::SetTypeName(const StringAnsiView& typeName, bool staticName) { - if (StringUtils::Length(TypeName) != typeName.Length()) + if (StringUtils::Length(TypeName) != typeName.Length() || StaticName != staticName) { - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); + StaticName = staticName; + if (staticName) + { + TypeName = (char*)typeName.Get(); + return; + } TypeName = static_cast(Allocator::Allocate(typeName.Length() + 1)); TypeName[typeName.Length()] = 0; } Platform::MemoryCopy(TypeName, typeName.Get(), typeName.Length()); } +void VariantType::SetTypeName(const ScriptingType& type) +{ + SetTypeName(type.Fullname, IS_VARIANT_TYPE_NAME_STATIC(type.Module->CanReload)); +} + +void VariantType::SetTypeName(const MClass& klass) +{ +#if USE_CSHARP + SetTypeName(klass.GetFullName(), IS_VARIANT_TYPE_NAME_STATIC(klass.GetAssembly()->CanReload())); +#endif +} + const char* VariantType::GetTypeName() const { if (TypeName) @@ -322,6 +383,17 @@ VariantType VariantType::GetElementType() const return VariantType(); } +void VariantType::Inline() +{ + const ScriptingTypeHandle typeHandle = Scripting::FindScriptingType(TypeName); + if (typeHandle) + SetTypeName(typeHandle.GetType()); +#if USE_CSHARP + else if (const auto mclass = Scripting::FindClass(TypeName)) + SetTypeName(*mclass); +#endif +} + ::String VariantType::ToString() const { ::String result; @@ -632,8 +704,7 @@ Variant::Variant(ScriptingObject* v) AsObject = v; if (v) { - // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) - Type.SetTypeName(v->GetType().Fullname); + Type.SetTypeName(v->GetType()); v->Deleted.Bind(this); } } @@ -644,9 +715,8 @@ Variant::Variant(Asset* v) AsAsset = v; if (v) { - // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) - Type.SetTypeName(v->GetType().Fullname); v->AddReference(); + Type.SetTypeName(v->GetType()); v->OnUnloaded.Bind(this); } } @@ -3007,16 +3077,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName) switch (type.Type) { case ScriptingTypes::Script: - v.SetType(VariantType(VariantType::Object, typeName)); + v.SetType(VariantType(VariantType::Object, type)); v.AsObject = type.Script.Spawn(ScriptingObjectSpawnParams(Guid::New(), typeHandle)); if (v.AsObject) v.AsObject->Deleted.Bind(&v); break; case ScriptingTypes::Structure: - v.SetType(VariantType(VariantType::Structure, typeName)); + v.SetType(VariantType(VariantType::Structure, type)); break; case ScriptingTypes::Enum: - v.SetType(VariantType(VariantType::Enum, typeName)); + v.SetType(VariantType(VariantType::Enum, type)); v.AsEnum = 0; break; default: @@ -3030,16 +3100,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName) // Fallback to C#-only types if (mclass->IsEnum()) { - v.SetType(VariantType(VariantType::Enum, typeName)); + v.SetType(VariantType(VariantType::Enum, mclass)); v.AsEnum = 0; } else if (mclass->IsValueType()) { - v.SetType(VariantType(VariantType::Structure, typeName)); + v.SetType(VariantType(VariantType::Structure, mclass)); } else { - v.SetType(VariantType(VariantType::ManagedObject, typeName)); + v.SetType(VariantType(VariantType::ManagedObject, mclass)); MObject* instance = mclass->CreateInstance(); if (instance) { diff --git a/Source/Engine/Core/Types/Variant.h b/Source/Engine/Core/Types/Variant.h index 4fd6ab2eb..5c057bc65 100644 --- a/Source/Engine/Core/Types/Variant.h +++ b/Source/Engine/Core/Types/Variant.h @@ -17,7 +17,7 @@ struct ScriptingTypeHandle; /// API_STRUCT(InBuild) struct FLAXENGINE_API VariantType { - enum Types + enum Types : uint8 { Null = 0, Void, @@ -80,10 +80,22 @@ API_STRUCT(InBuild) struct FLAXENGINE_API VariantType }; public: - /// - /// The type of the variant. - /// - Types Type; + union + { + struct + { + /// + /// The type of the variant. + /// + Types Type; + + /// + /// Internal flag used to indicate that pointer to TypeName has been linked from a static/external memory that is stable (eg. ScriptingType or MClass). Allows avoiding dynamic memory allocation. + /// + uint8 StaticName : 1; + }; + uint16 Packed; + }; /// /// The optional additional full name of the scripting type. Used for Asset, Object, Enum, Structure types to describe type precisely. @@ -94,17 +106,20 @@ public: FORCE_INLINE VariantType() { Type = Null; + StaticName = 0; TypeName = nullptr; } FORCE_INLINE explicit VariantType(Types type) { Type = type; + StaticName = 0; TypeName = nullptr; } explicit VariantType(Types type, const StringView& typeName); - explicit VariantType(Types type, const StringAnsiView& typeName); + explicit VariantType(Types type, const StringAnsiView& typeName, bool staticName = false); + explicit VariantType(Types type, const ScriptingType& sType); explicit VariantType(Types type, const MClass* klass); explicit VariantType(const StringAnsiView& typeName); VariantType(const VariantType& other); @@ -112,7 +127,8 @@ public: FORCE_INLINE ~VariantType() { - Allocator::Free(TypeName); + if (!StaticName) + Allocator::Free(TypeName); } public: @@ -130,9 +146,13 @@ public: public: void SetTypeName(const StringView& typeName); - void SetTypeName(const StringAnsiView& typeName); + void SetTypeName(const StringAnsiView& typeName, bool staticName = false); + void SetTypeName(const ScriptingType& type); + void SetTypeName(const MClass& klass); const char* GetTypeName() const; VariantType GetElementType() const; + // Drops custom type name into the name allocated by the scripting module to reduce memory allocations when referencing types. + void Inline(); ::String ToString() const; }; diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index 4d26e678b..bbcd7de57 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -683,6 +683,8 @@ BinaryModule* BinaryModule::GetModule(const StringAnsiView& name) BinaryModule::BinaryModule() { + CanReload = USE_EDITOR; + // Register GetModules().Add(this); } diff --git a/Source/Engine/Scripting/BinaryModule.h b/Source/Engine/Scripting/BinaryModule.h index 70aa60fff..1da35401b 100644 --- a/Source/Engine/Scripting/BinaryModule.h +++ b/Source/Engine/Scripting/BinaryModule.h @@ -91,6 +91,11 @@ public: /// Dictionary TypeNameToTypeIndex; + /// + /// Determinates whether module can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some modules such as engine and class library modules are static. + /// + bool CanReload; + public: /// diff --git a/Source/Engine/Scripting/ManagedCLR/MAssembly.h b/Source/Engine/Scripting/ManagedCLR/MAssembly.h index 6c0aa9579..0a785c06a 100644 --- a/Source/Engine/Scripting/ManagedCLR/MAssembly.h +++ b/Source/Engine/Scripting/ManagedCLR/MAssembly.h @@ -34,6 +34,7 @@ private: int32 _isLoaded : 1; int32 _isLoading : 1; + int32 _canReload : 1; mutable int32 _hasCachedClasses : 1; mutable ClassesDictionary _classes; @@ -125,6 +126,14 @@ public: return _isLoaded != 0; } + /// + /// Returns true if assembly can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some assemblies such as engine and class library modules are static. + /// + FORCE_INLINE bool CanReload() const + { + return USE_EDITOR && _canReload; + } + /// /// Gets the assembly name. /// diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 350cc39d2..6fa499002 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -45,6 +45,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name) : _domain(domain) , _isLoaded(false) , _isLoading(false) + , _canReload(true) , _hasCachedClasses(false) , _reloadCount(0) , _name(name) @@ -59,6 +60,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name, const StringAn , _domain(domain) , _isLoaded(false) , _isLoading(false) + , _canReload(true) , _hasCachedClasses(false) , _reloadCount(0) , _name(name) diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 1c8c2bcdd..4be0ce1a1 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -874,6 +874,7 @@ bool MAssembly::LoadCorlib() return true; } _hasCachedClasses = false; + _canReload = false; CachedAssemblyHandles.Add(_handle, this); // End diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 4e17bb80a..aa7e26674 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -502,6 +502,7 @@ bool Scripting::LoadBinaryModules(const String& path, const String& projectFolde // C# if (managedPath.HasChars() && !((ManagedBinaryModule*)module)->Assembly->IsLoaded()) { + (((ManagedBinaryModule*)module)->Assembly)->_canReload = module->CanReload; if (((ManagedBinaryModule*)module)->Assembly->Load(managedPath, nativePath)) { LOG(Error, "Failed to load C# assembly '{0}' for binary module {1}.", managedPath, name); @@ -528,6 +529,7 @@ bool Scripting::Load() #if USE_CSHARP // Load C# core assembly ManagedBinaryModule* corlib = GetBinaryModuleCorlib(); + corlib->CanReload = false; if (corlib->Assembly->LoadCorlib()) { LOG(Error, "Failed to load corlib C# assembly."); @@ -581,6 +583,8 @@ bool Scripting::Load() LOG(Error, "Failed to load FlaxEngine C# assembly."); return true; } + flaxEngineModule->CanReload = false; + flaxEngineModule->Assembly->_canReload = false; onEngineLoaded(flaxEngineModule->Assembly); // Insert type aliases for vector types that don't exist in C++ but are just typedef (properly redirect them to actual types) diff --git a/Source/Engine/Serialization/Serialization.cpp b/Source/Engine/Serialization/Serialization.cpp index a3dfc6ffa..1eb6b0181 100644 --- a/Source/Engine/Serialization/Serialization.cpp +++ b/Source/Engine/Serialization/Serialization.cpp @@ -78,7 +78,10 @@ void Serialization::Deserialize(ISerializable::DeserializeStream& stream, Varian v.Type = VariantType::Null; const auto mTypeName = SERIALIZE_FIND_MEMBER(stream, "TypeName"); if (mTypeName != stream.MemberEnd() && mTypeName->value.IsString()) + { v.SetTypeName(StringAnsiView(mTypeName->value.GetStringAnsiView())); + v.Inline(); + } } else { diff --git a/Source/Engine/Serialization/Stream.cpp b/Source/Engine/Serialization/Stream.cpp index f95e9ef9b..4c9b94042 100644 --- a/Source/Engine/Serialization/Stream.cpp +++ b/Source/Engine/Serialization/Stream.cpp @@ -255,6 +255,7 @@ void ReadStream::Read(VariantType& data) ptr++; } *ptr = 0; + data.Inline(); } else if (typeNameLength > 0) { From a1399c51575cd98a2a90b5f4eab7320b22bf7d35 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 18:02:58 +0100 Subject: [PATCH 18/23] Optimize Anim Graph retarget to use cached pose to avoid dynamic memory allocation #3827 --- Source/Engine/Animations/Graph/AnimGraph.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index e99f53b8f..3c2630b51 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -336,11 +336,13 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) SkeletonData* animResultSkeleton = &skeleton; // Retarget animation when using output pose from other skeleton - AnimGraphImpulse retargetNodes; if (_graph.BaseModel != data.NodesSkeleton) { ANIM_GRAPH_PROFILE_EVENT("Retarget"); auto& targetSkeleton = data.NodesSkeleton->Skeleton; + if (context.PoseCacheSize == context.PoseCache.Count()) + context.PoseCache.AddOne(); + auto& retargetNodes = context.PoseCache[context.PoseCacheSize++]; retargetNodes = *animResult; retargetNodes.Nodes.Resize(targetSkeleton.Nodes.Count()); Transform* targetNodes = retargetNodes.Nodes.Get(); From d2ee61ef8ddd55685b03ceac7486d446cf9230c5 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 18:06:57 +0100 Subject: [PATCH 19/23] Fix `GetNodesPose` issue when 2 threads call it at once --- Source/Engine/Graphics/Models/SkinnedMesh.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Engine/Graphics/Models/SkinnedMesh.cpp b/Source/Engine/Graphics/Models/SkinnedMesh.cpp index 369e5d825..0377003be 100644 --- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp @@ -212,7 +212,6 @@ const Array& SkeletonData::GetNodesPose() const ScopeLock lock(RenderContext::GPULocker); if (Platform::AtomicRead(&_dirty)) { - Platform::AtomicStore(&_dirty, 0); const SkeletonNode* nodes = Nodes.Get(); const int32 nodesCount = Nodes.Count(); _cachedPose.Resize(nodesCount); @@ -227,6 +226,7 @@ const Array& SkeletonData::GetNodesPose() const else posePtr[nodeIndex] = local; } + Platform::AtomicStore(&_dirty, 0); } } return _cachedPose; From 0f6c1aea629cbe60468209a4b6c11e8f1a7af53d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 18:40:39 +0100 Subject: [PATCH 20/23] Fix various material nodes to work on D3D12 --- .../MaterialGenerator.Material.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp index 09ac78e6b..f3d830382 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp @@ -384,7 +384,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) // Apply hardness, use 0.991 as max since any value above will result in harsh aliasing auto x2 = writeLocal(ValueType::Float, String::Format(TEXT("saturate((1 - {0}) * (1 / (1 - clamp({1}, 0, 0.991f))))"), x1.Value, hardness.Value), node); - value = writeLocal(ValueType::Float, String::Format(TEXT("{0} ? (1 - {1}) : {1}"), invert.Value, x2.Value), node); + value = writeLocal(ValueType::Float, String::Format(TEXT("select({0}, (1 - {1}), {1})"), invert.Value, x2.Value), node); break; } // Tiling & Offset @@ -459,7 +459,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) auto x = writeLocal(ValueType::Float, String::Format(TEXT("56100000.0f * pow({0}, -1) + 148.0f"), temperature.Value), node); // Value Y - auto y = writeLocal(ValueType::Float, String::Format(TEXT("{0} > 6500.0f ? 35200000.0f * pow({0}, -1) + 184.0f : 100.04f * log({0}) - 623.6f"), temperature.Value), node); + auto y = writeLocal(ValueType::Float, String::Format(TEXT("select({0} > 6500.0f, 35200000.0f * pow({0}, -1) + 184.0f, 100.04f * log({0}) - 623.6f)"), temperature.Value), node); // Value Z auto z = writeLocal(ValueType::Float, String::Format(TEXT("194.18f * log({0}) - 1448.6f"), temperature.Value), node); @@ -467,7 +467,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) // Final color auto color = writeLocal(ValueType::Float3, String::Format(TEXT("float3({0}, {1}, {2})"), x.Value, y.Value, z.Value), node); color = writeLocal(ValueType::Float3, String::Format(TEXT("clamp({0}, 0.0f, 255.0f) / 255.0f"), color.Value), node); - value = writeLocal(ValueType::Float3, String::Format(TEXT("{1} < 1000.0f ? {0} * {1}/1000.0f : {0}"), color.Value, temperature.Value), node); + value = writeLocal(ValueType::Float3, String::Format(TEXT("select({1} < 1000.0f, {0} * {1}/1000.0f, {0})"), color.Value, temperature.Value), node); break; } // HSVToRGB @@ -490,8 +490,8 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) const auto rgb = tryGetValue(node->GetBox(0), node->Values[0]).AsFloat3(); const auto epsilon = writeLocal(ValueType::Float, TEXT("1e-10"), node); - auto p = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.g < {0}.b) ? float4({0}.bg, -1.0f, 2.0f/3.0f) : float4({0}.gb, 0.0f, -1.0f/3.0f)"), rgb.Value), node); - auto q = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.r < {1}.x) ? float4({1}.xyw, {0}.r) : float4({0}.r, {1}.yzx)"), rgb.Value, p.Value), node); + auto p = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.g < {0}.b), float4({0}.bg, -1.0f, 2.0f/3.0f), float4({0}.gb, 0.0f, -1.0f/3.0f))"), rgb.Value), node); + auto q = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.r < {1}.x), float4({1}.xyw, {0}.r), float4({0}.r, {1}.yzx))"), rgb.Value, p.Value), node); auto c = writeLocal(ValueType::Float, String::Format(TEXT("{0}.x - min({0}.w, {0}.y)"), q.Value), node); auto h = writeLocal(ValueType::Float, String::Format(TEXT("abs(({0}.w - {0}.y) / (6 * {1} + {2}) + {0}.z)"), q.Value, c.Value, epsilon.Value), node); @@ -721,13 +721,13 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) blendFormula = TEXT("1.0 - (1.0 - base) * (1.0 - blend)"); break; case 5: // Overlay - blendFormula = TEXT("base <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)"); + blendFormula = TEXT("select(base <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))"); break; case 6: // Linear Burn blendFormula = TEXT("base + blend - 1.0"); break; case 7: // Linear Light - blendFormula = TEXT("blend < 0.5 ? max(base + (2.0 * blend) - 1.0, 0.0) : min(base + 2.0 * (blend - 0.5), 1.0)"); + blendFormula = TEXT("select(blend < 0.5, max(base + (2.0 * blend) - 1.0, 0.0), min(base + 2.0 * (blend - 0.5), 1.0))"); break; case 8: // Darken blendFormula = TEXT("min(base, blend)"); @@ -745,10 +745,10 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) blendFormula = TEXT("base / (blend + 0.000001)"); break; case 13: // Hard Light - blendFormula = TEXT("blend <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)"); + blendFormula = TEXT("select(blend <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))"); break; case 14: // Pin Light - blendFormula = TEXT("blend <= 0.5 ? min(base, 2.0 * blend) : max(base, 2.0 * (blend - 0.5))"); + blendFormula = TEXT("select(blend <= 0.5, min(base, 2.0 * blend), max(base, 2.0 * (blend - 0.5)))"); break; case 15: // Hard Mix blendFormula = TEXT("step(1.0 - base, blend)"); From 55f73b6cf7acf55613d52ad528db70eb69f03eff Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Mon, 9 Feb 2026 23:03:25 +0100 Subject: [PATCH 21/23] Optimize Animated Models bones updating with a batches memory pass and manual resource transitions batch #3917 #3827 --- Source/Engine/Level/Actors/AnimatedModel.cpp | 83 +++++++++++++++++--- Source/Engine/Renderer/RenderList.cpp | 31 ++++++++ Source/Engine/Renderer/RenderList.h | 19 +++++ Source/Engine/Renderer/Renderer.cpp | 2 + 4 files changed, 125 insertions(+), 10 deletions(-) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index f6ac51f1c..11497e558 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -14,15 +14,84 @@ #include "Engine/Content/Deprecated.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/Models/MeshAccessor.h" #include "Engine/Graphics/Models/MeshDeformation.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/SceneObjectsFactory.h" -#include "Engine/Profiler/ProfilerMemory.h" -#include "Engine/Renderer/RenderList.h" +#include "Engine/Profiler/Profiler.h" #include "Engine/Serialization/Serialization.h" +// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models. +class AnimatedModelRenderListExtension : public RenderList::IExtension +{ +public: + struct Item + { + GPUBuffer* BoneMatrices; + void* Data; + int32 Size; + }; + + RenderListBuffer Items; + + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + Items.Clear(); + } + + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + const int32 count = Items.Count(); + if (count == 0) + return; + PROFILE_GPU_CPU_NAMED("Update Bones"); + GPUMemoryPass pass(context); + Item* items = Items.Get(); + + // Special case for D3D11 backend that doesn't need transitions + if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11) + { + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + } + else + { + // Batch resource barriers for buffer update + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite); + + // Update all buffers within Memory Pass (no barriers between) + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + + // Batch resource barriers for reading in Vertex Shader + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics); + } + +#if COMPILE_WITH_PROFILER + // Insert amount of kilobytes of data updated into profiler trace + uint32 dataSize = 0; + for (int32 i = 0; i < count; i++) + dataSize += items[i].Size; + ZoneValue(dataSize / 1024); +#endif + + Items.Clear(); + } +}; + +AnimatedModelRenderListExtension RenderListExtension; + AnimatedModel::AnimatedModel(const SpawnParams& params) : ModelInstanceActor(params) , _actualMode(AnimationUpdateMode::Never) @@ -1013,10 +1082,7 @@ void AnimatedModel::Draw(RenderContext& renderContext) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) - { - context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - }); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); _skinningData.OnFlush(); } @@ -1059,10 +1125,7 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - renderContext.List->AddDelayedDraw([this](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) - { - context->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - }); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); _skinningData.OnFlush(); } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 23fcd52dc..ac643b4e8 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -31,6 +31,13 @@ namespace Array FreeRenderList; Array> MemPool; CriticalSection MemPoolLocker; + + typedef Array> ExtensionsList; + ExtensionsList& GetExtensions() + { + static ExtensionsList list; + return list; + } } void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor) @@ -236,6 +243,16 @@ void RenderList::CleanupCache() MemPoolLocker.Unlock(); } +RenderList::IExtension::IExtension() +{ + GetExtensions().Add(this); +} + +RenderList::IExtension::~IExtension() +{ + GetExtensions().Remove(this); +} + bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const { // Sort by higher priority @@ -271,6 +288,20 @@ void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& rend _delayedDraws.Clear(); } +#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions) + +void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PreDraw(context, renderContextBatch); +} + +void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PostDraw(context, renderContextBatch); +} + void RenderList::BlendSettings() { PROFILE_CPU(); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 9b0a91ed6..b4b7121de 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -326,6 +326,21 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject /// static void CleanupCache(); + /// + /// The rendering extension interface for custom drawing/effects linked to RenderList. Can be used during async scene drawing and further drawing/processing for more optimized rendering. + /// + class FLAXENGINE_API IExtension + { + public: + IExtension(); + virtual ~IExtension(); + + // Event called before collecting draw calls. Can be used for initialization. + virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + // Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing. + virtual void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + }; + public: /// /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs. @@ -475,6 +490,10 @@ public: AddDelayedDraw(MoveTemp(func)); } + // IExtension implementation + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + private: DynamicVertexBuffer _instanceBuffer; RenderListBuffer _delayedDraws; diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 2f17e5294..96253934e 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -423,6 +423,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont if (setup.UseMotionVectors) view.Pass |= DrawPass::MotionVectors; renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value + renderContext.List->PreDraw(context, renderContextBatch); bool drawShadows = !isGBufferDebug && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows) && ShadowsPass::Instance()->IsReady(); switch (renderContext.View.Mode) @@ -462,6 +463,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i); + renderContext.List->PostDraw(context, renderContextBatch); #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); From 846b64048f0ceaed946d5d13cb10e2aea5c6c9c2 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 10 Feb 2026 11:42:17 +0100 Subject: [PATCH 22/23] Update build number --- Flax.flaxproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Flax.flaxproj b/Flax.flaxproj index 74ab82f55..c9c27281b 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,10 +4,10 @@ "Major": 1, "Minor": 11, "Revision": 0, - "Build": 6806 + "Build": 6807 }, "Company": "Flax", - "Copyright": "Copyright (c) 2012-2025 Wojciech Figat. All rights reserved.", + "Copyright": "Copyright (c) 2012-2026 Wojciech Figat. All rights reserved.", "GameTarget": "FlaxGame", "EditorTarget": "FlaxEditor", "Configuration": { From 1f9f281c317b2f343c5796e67142bec30acb3e5d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 10 Feb 2026 15:02:05 +0100 Subject: [PATCH 23/23] Fix regression from 7b7a92758ff1a6f040c535b8e5a8858dd36d7380 for Visual Scripts --- Source/Engine/Content/Assets/VisualScript.cpp | 2 ++ Source/Engine/Core/Types/Variant.cpp | 31 +++++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index 329696dea..a7e132bdc 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -1700,6 +1700,8 @@ void VisualScript::CacheScriptingType() VisualScriptingBinaryModule::VisualScriptingBinaryModule() : _name("Visual Scripting") { + // Visual Scripts can be unloaded and loaded again even in game + CanReload = true; } ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const ScriptingObjectSpawnParams& params) diff --git a/Source/Engine/Core/Types/Variant.cpp b/Source/Engine/Core/Types/Variant.cpp index bd2d594df..dcabe8e48 100644 --- a/Source/Engine/Core/Types/Variant.cpp +++ b/Source/Engine/Core/Types/Variant.cpp @@ -35,13 +35,6 @@ #endif #define AsEnum AsUint64 -// Editor can hot-reload assemblies thus cached type names may become invalid, otherwise use modules that are never unloaded and their type names are always valid -#if USE_EDITOR -#define IS_VARIANT_TYPE_NAME_STATIC(canReload) !canReload -#else -#define IS_VARIANT_TYPE_NAME_STATIC(canReload) true -#endif - namespace { const char* InBuiltTypesTypeNames[40] = @@ -352,13 +345,13 @@ void VariantType::SetTypeName(const StringAnsiView& typeName, bool staticName) void VariantType::SetTypeName(const ScriptingType& type) { - SetTypeName(type.Fullname, IS_VARIANT_TYPE_NAME_STATIC(type.Module->CanReload)); + SetTypeName(type.Fullname, type.Module->CanReload); } void VariantType::SetTypeName(const MClass& klass) { #if USE_CSHARP - SetTypeName(klass.GetFullName(), IS_VARIANT_TYPE_NAME_STATIC(klass.GetAssembly()->CanReload())); + SetTypeName(klass.GetFullName(), klass.GetAssembly()->CanReload()); #endif } @@ -385,11 +378,23 @@ VariantType VariantType::GetElementType() const void VariantType::Inline() { - const ScriptingTypeHandle typeHandle = Scripting::FindScriptingType(TypeName); - if (typeHandle) - SetTypeName(typeHandle.GetType()); + // Check if the typename comes from static assembly which can be used to inline name instead of dynamic memory allocation + StringAnsiView typeName(TypeName); + auto& modules = BinaryModule::GetModules(); + for (auto module : modules) + { + int32 typeIndex; + if (!module->CanReload && module->FindScriptingType(typeName, typeIndex)) + { + ScriptingTypeHandle typeHandle(module, typeIndex); + SetTypeName(typeHandle.GetType().Fullname, true); + return; + } + } + #if USE_CSHARP - else if (const auto mclass = Scripting::FindClass(TypeName)) + // Try with C#-only types + if (const auto mclass = Scripting::FindClass(TypeName)) SetTypeName(*mclass); #endif }