Merge branch 'master' into Improve-HighlightedPopUpColor

2026-02-10 17:09:22 +01:00
parent 05a0a6b539 1f9f281c31
commit 07b6d3558a
36 changed files with 1106 additions and 427 deletions
--- a/Flax.flaxproj
+++ b/Flax.flaxproj
@@ -4,10 +4,10 @@
    "Major": 1,
    "Minor": 11,
    "Revision": 0,
-    "Build": 6806
+    "Build": 6807
  },
  "Company": "Flax",
-  "Copyright": "Copyright (c) 2012-2025 Wojciech Figat. All rights reserved.",
+  "Copyright": "Copyright (c) 2012-2026 Wojciech Figat. All rights reserved.",
  "GameTarget": "FlaxGame",
  "EditorTarget": "FlaxEditor",
  "Configuration": {
--- a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs
+++ b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs
@@ -99,7 +99,14 @@ namespace FlaxEditor.Windows.Assets
                Window = window;
                var surfaceParam = window.Surface.GetParameter(BaseModelId);
                if (surfaceParam != null)
-                    BaseModel = FlaxEngine.Content.LoadAsync<SkinnedModel>((Guid)surfaceParam.Value);
+                {
+                    if (surfaceParam.Value is Guid asGuid)
+                        BaseModel = FlaxEngine.Content.LoadAsync<SkinnedModel>(asGuid);
+                    else if (surfaceParam.Value is SkinnedModel asModel)
+                        BaseModel = asModel;
+                    else
+                        BaseModel = null;
+                }
                else
                    BaseModel = window.PreviewActor.GetParameterValue(BaseModelId) as SkinnedModel;
            }
--- a/Source/Editor/Windows/Assets/SkinnedModelWindow.cs
+++ b/Source/Editor/Windows/Assets/SkinnedModelWindow.cs
@@ -70,6 +70,13 @@ namespace FlaxEditor.Windows.Assets
                        return;
                    var nodes = proxy.Asset.Nodes;
                    var bones = proxy.Asset.Bones;
+                    var blendShapes = proxy.Asset.BlendShapes;
+
+                    // Info
+                    {
+                        var group = layout.Group("Info");
+                        group.Label($"Nodes: {nodes.Length}\nBones: {bones.Length}\nBlend Shapes: {blendShapes.Length}").AddCopyContextMenu().Label.Height *= 2.5f;
+                    }

                    // Skeleton Bones
                    {
@@ -109,7 +116,6 @@ namespace FlaxEditor.Windows.Assets
                    }

                    // Blend Shapes
-                    var blendShapes = proxy.Asset.BlendShapes;
                    if (blendShapes.Length != 0)
                    {
                        var group = layout.Group("Blend Shapes");
--- a/Source/Engine/Animations/Graph/AnimGraph.cpp
+++ b/Source/Engine/Animations/Graph/AnimGraph.cpp
@@ -336,11 +336,13 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt)
    SkeletonData* animResultSkeleton = &skeleton;

    // Retarget animation when using output pose from other skeleton
-    AnimGraphImpulse retargetNodes;
    if (_graph.BaseModel != data.NodesSkeleton)
    {
        ANIM_GRAPH_PROFILE_EVENT("Retarget");
        auto& targetSkeleton = data.NodesSkeleton->Skeleton;
+        if (context.PoseCacheSize == context.PoseCache.Count())
+            context.PoseCache.AddOne();
+        auto& retargetNodes = context.PoseCache[context.PoseCacheSize++];
        retargetNodes = *animResult;
        retargetNodes.Nodes.Resize(targetSkeleton.Nodes.Count());
        Transform* targetNodes = retargetNodes.Nodes.Get();
--- a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp
+++ b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp
@@ -109,86 +109,84 @@ namespace
            nodes->RootMotion.Orientation.Normalize();
        }
    }
-
-    Matrix ComputeWorldMatrixRecursive(const SkeletonData& skeleton, int32 index, Matrix localMatrix)
-    {
-        const auto& node = skeleton.Nodes[index];
-        index = node.ParentIndex;
-        while (index != -1)
-        {
-            const auto& parent = skeleton.Nodes[index];
-            localMatrix *= parent.LocalTransform.GetWorld();
-            index = parent.ParentIndex;
-        }
-        return localMatrix;
-    }
-
-    Matrix ComputeInverseParentMatrixRecursive(const SkeletonData& skeleton, int32 index)
-    {
-        Matrix inverseParentMatrix = Matrix::Identity;
-        const auto& node = skeleton.Nodes[index];
-        if (node.ParentIndex != -1)
-        {
-            inverseParentMatrix = ComputeWorldMatrixRecursive(skeleton, index, inverseParentMatrix);
-            inverseParentMatrix = Matrix::Invert(inverseParentMatrix);
-        }
-        return inverseParentMatrix;
-    }
 }

-void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 targetIndex)
+// Utility for retargeting animation poses between skeletons.
+struct Retargeting
 {
-    // sourceSkeleton - skeleton of Anim Graph (Base Locomotion pack)
-    // targetSkeleton - visual mesh skeleton (City Characters pack)
-    // target - anim graph input/output transformation of that node
-    const auto& targetNode = targetSkeleton.Nodes[targetIndex];
-    const int32 sourceIndex = sourceMapping.NodesMapping[targetIndex];
-    if (sourceIndex == -1)
+private:
+    const Matrix* _sourcePosePtr, * _targetPosePtr;
+    const SkeletonData* _sourceSkeleton, *_targetSkeleton;
+    const SkinnedModel::SkeletonMapping* _sourceMapping;
+
+public:
+    void Init(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping)
    {
-        // Use T-pose
-        node = targetNode.LocalTransform;
-        return;
+        ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == sourceMapping.NodesMapping.Length());
+        
+        // Cache world-space poses for source and target skeletons to avoid redundant calculations during retargeting
+        _sourcePosePtr = sourceSkeleton.GetNodesPose().Get();
+        _targetPosePtr = targetSkeleton.GetNodesPose().Get();
+
+        _sourceSkeleton = &sourceSkeleton;
+        _targetSkeleton = &targetSkeleton;
+        _sourceMapping = &sourceMapping;
    }
-    const auto& sourceNode = sourceSkeleton.Nodes[sourceIndex];

-    // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/]
-
-    // Calculate T-Pose of source node, target node and target parent node
-    Matrix bindMatrix = ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, sourceNode.LocalTransform.GetWorld());
-    Matrix inverseBindMatrix = Matrix::Invert(bindMatrix);
-    Matrix targetMatrix = ComputeWorldMatrixRecursive(targetSkeleton, targetIndex, targetNode.LocalTransform.GetWorld());
-    Matrix inverseParentMatrix = ComputeInverseParentMatrixRecursive(targetSkeleton, targetIndex);
-
-    // Target node animation is world-space difference of the animated source node inside the target's parent node world-space
-    Matrix localMatrix = inverseBindMatrix * ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, node.GetWorld());
-    localMatrix = targetMatrix * localMatrix * inverseParentMatrix;
-
-    // Extract local node transformation
-    localMatrix.Decompose(node);
-}
-
-void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes)
-{
-    // TODO: cache source and target skeletons world-space poses for faster retargeting (use some pooled memory)
-    ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == mapping.NodesMapping.Length());
-    for (int32 targetIndex = 0; targetIndex < targetSkeleton.Nodes.Count(); targetIndex++)
+    void RetargetNode(const Transform& source, Transform& target, int32 sourceIndex, int32 targetIndex)
    {
-        auto& targetNode = targetSkeleton.Nodes.Get()[targetIndex];
-        const int32 sourceIndex = mapping.NodesMapping.Get()[targetIndex];
-        Transform node;
+        // sourceSkeleton - skeleton of Anim Graph
+        // targetSkeleton - visual mesh skeleton
+        // target - anim graph input/output transformation of that node
+        const SkeletonNode& targetNode = _targetSkeleton->Nodes.Get()[targetIndex];
        if (sourceIndex == -1)
        {
            // Use T-pose
-            node = targetNode.LocalTransform;
+            target = targetNode.LocalTransform;
        }
        else
        {
-            // Retarget
-            node = sourceNodes[sourceIndex];
-            RetargetSkeletonNode(sourceSkeleton, targetSkeleton, mapping, node, targetIndex);
+            // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/]
+
+            // Calculate T-Pose of source node, target node and target parent node
+            const Matrix* sourcePosePtr = _sourcePosePtr;
+            const Matrix* targetPosePtr = _targetPosePtr;
+            const Matrix& bindMatrix = sourcePosePtr[sourceIndex];
+            const Matrix& targetMatrix = targetPosePtr[targetIndex];
+            Matrix inverseParentMatrix;
+            if (targetNode.ParentIndex != -1)
+                Matrix::Invert(targetPosePtr[targetNode.ParentIndex], inverseParentMatrix);
+            else
+                inverseParentMatrix = Matrix::Identity;
+
+            // Target node animation is world-space difference of the animated source node inside the target's parent node world-space
+            const SkeletonNode& sourceNode = _sourceSkeleton->Nodes.Get()[sourceIndex];
+            Matrix localMatrix = source.GetWorld();
+            if (sourceNode.ParentIndex != -1)
+                localMatrix = localMatrix * sourcePosePtr[sourceNode.ParentIndex];
+            localMatrix = Matrix::Invert(bindMatrix) * localMatrix;
+            localMatrix = targetMatrix * localMatrix * inverseParentMatrix;
+
+            // Extract local node transformation
+            localMatrix.Decompose(target);
        }
-        targetNodes[targetIndex] = node;
    }
+
+    FORCE_INLINE void RetargetPose(const Transform* sourceNodes, Transform* targetNodes)
+    {
+        for (int32 targetIndex = 0; targetIndex < _targetSkeleton->Nodes.Count(); targetIndex++)
+        {
+            const int32 sourceIndex = _sourceMapping->NodesMapping.Get()[targetIndex];
+            RetargetNode(sourceNodes[sourceIndex], targetNodes[targetIndex], sourceIndex, targetIndex);
+        }
+    }
+};
+
+void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes)
+{
+    Retargeting retargeting;
+    retargeting.Init(sourceSkeleton, targetSkeleton, mapping);
+    retargeting.RetargetPose(sourceNodes, targetNodes);
 }

 AnimGraphTraceEvent& AnimGraphContext::AddTraceEvent(const AnimGraphNode* node)
@@ -431,9 +429,13 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode*
    const bool weighted = weight < 1.0f;
    const bool retarget = mapping.SourceSkeleton && mapping.SourceSkeleton != mapping.TargetSkeleton;
    const auto emptyNodes = GetEmptyNodes();
+    Retargeting retargeting;
    SkinnedModel::SkeletonMapping sourceMapping;
    if (retarget)
+    {
        sourceMapping = _graph.BaseModel->GetSkeletonMapping(mapping.SourceSkeleton);
+        retargeting.Init(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, mapping);
+    }
    for (int32 nodeIndex = 0; nodeIndex < nodes->Nodes.Count(); nodeIndex++)
    {
        const int32 nodeToChannel = mapping.NodesMapping[nodeIndex];
@@ -447,7 +449,8 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode*
            // Optionally retarget animation into the skeleton used by the Anim Graph
            if (retarget)
            {
-                RetargetSkeletonNode(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, sourceMapping, srcNode, nodeIndex);
+                const int32 sourceIndex = sourceMapping.NodesMapping[nodeIndex];
+                retargeting.RetargetNode(srcNode, srcNode, sourceIndex, nodeIndex);
            }

            // Mark node as used
--- a/Source/Engine/Content/Assets/SkinnedModel.cpp
+++ b/Source/Engine/Content/Assets/SkinnedModel.cpp
@@ -61,16 +61,24 @@ Array<String> SkinnedModel::GetBlendShapes()

 SkinnedModel::SkeletonMapping SkinnedModel::GetSkeletonMapping(Asset* source, bool autoRetarget)
 {
+    // Fast-path to use cached mapping
    SkeletonMapping mapping;
    mapping.TargetSkeleton = this;
+    SkeletonMappingData mappingData;
+    if (_skeletonMappingCache.TryGet(source, mappingData))
+    {
+        mapping.SourceSkeleton = mappingData.SourceSkeleton;
+        mapping.NodesMapping = mappingData.NodesMapping;
+        return mapping;
+    }
+    mapping.SourceSkeleton = nullptr;
+
    if (WaitForLoaded() || !source || source->WaitForLoaded())
        return mapping;
+    PROFILE_CPU();
    ScopeLock lock(Locker);
-    SkeletonMappingData mappingData;
    if (!_skeletonMappingCache.TryGet(source, mappingData))
    {
-        PROFILE_CPU();
-
        // Initialize the mapping
        SkeletonRetarget* retarget = nullptr;
        const Guid sourceId = source->GetID();
@@ -370,6 +378,7 @@ bool SkinnedModel::SetupSkeleton(const Array<SkeletonNode>& nodes)
        model->Skeleton.Bones[i].LocalTransform = node.LocalTransform;
        model->Skeleton.Bones[i].NodeIndex = i;
    }
+    model->Skeleton.Dirty();
    ClearSkeletonMapping();

    // Calculate offset matrix (inverse bind pose transform) for every bone manually
@@ -427,6 +436,7 @@ bool SkinnedModel::SetupSkeleton(const Array<SkeletonNode>& nodes, const Array<S
    // Setup
    model->Skeleton.Nodes = nodes;
    model->Skeleton.Bones = bones;
+    model->Skeleton.Dirty();
    ClearSkeletonMapping();

    // Calculate offset matrix (inverse bind pose transform) for every bone manually
@@ -823,13 +833,13 @@ bool SkinnedModel::SaveMesh(WriteStream& stream, const ModelData& modelData, int

 void SkinnedModel::ClearSkeletonMapping()
 {
-    for (auto& e : _skeletonMappingCache)
+    for (const auto& e : _skeletonMappingCache)
    {
        e.Key->OnUnloaded.Unbind<SkinnedModel, &SkinnedModel::OnSkeletonMappingSourceAssetUnloaded>(this);
 #if USE_EDITOR
        e.Key->OnReloading.Unbind<SkinnedModel, &SkinnedModel::OnSkeletonMappingSourceAssetUnloaded>(this);
 #endif
-        Allocator::Free(e.Value.NodesMapping.Get());
+        Allocator::Free((void*)e.Value.NodesMapping.Get());
    }
    _skeletonMappingCache.Clear();
 }
@@ -837,8 +847,9 @@ void SkinnedModel::ClearSkeletonMapping()
 void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj)
 {
    ScopeLock lock(Locker);
-    auto i = _skeletonMappingCache.Find(obj);
-    ASSERT(i != _skeletonMappingCache.End());
+    SkeletonMappingData mappingData;
+    bool found = _skeletonMappingCache.TryGet(obj, mappingData);
+    ASSERT(found);

    // Unlink event
    obj->OnUnloaded.Unbind<SkinnedModel, &SkinnedModel::OnSkeletonMappingSourceAssetUnloaded>(this);
@@ -847,8 +858,8 @@ void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj)
 #endif

    // Clear cache
-    Allocator::Free(i->Value.NodesMapping.Get());
-    _skeletonMappingCache.Remove(i);
+    Allocator::Free(mappingData.NodesMapping.Get());
+    _skeletonMappingCache.Remove(obj);
 }

 uint64 SkinnedModel::GetMemoryUsage() const
--- a/Source/Engine/Content/Assets/SkinnedModel.h
+++ b/Source/Engine/Content/Assets/SkinnedModel.h
@@ -3,7 +3,7 @@
 #pragma once

 #include "ModelBase.h"
-#include "Engine/Core/Collections/Dictionary.h"
+#include "Engine/Threading/ConcurrentDictionary.h"
 #include "Engine/Graphics/Models/SkinnedMesh.h"
 #include "Engine/Graphics/Models/SkeletonData.h"

@@ -101,9 +101,9 @@ public:
    struct FLAXENGINE_API SkeletonMapping
    {
        // Target skeleton.
-        AssetReference<SkinnedModel> TargetSkeleton;
+        SkinnedModel* TargetSkeleton;
        // Source skeleton.
-        AssetReference<SkinnedModel> SourceSkeleton;
+        SkinnedModel* SourceSkeleton;
        // The node-to-node mapping for the fast animation sampling for the skinned model skeleton nodes. Each item is index of the source skeleton node into target skeleton node.
        Span<int32> NodesMapping;
    };
@@ -115,7 +115,7 @@ private:
        Span<int32> NodesMapping;
    };

-    Dictionary<Asset*, SkeletonMappingData> _skeletonMappingCache;
+    ConcurrentDictionary<Asset*, SkeletonMappingData> _skeletonMappingCache;

 public:
    /// <summary>
--- a/Source/Engine/Content/Assets/VisualScript.cpp
+++ b/Source/Engine/Content/Assets/VisualScript.cpp
@@ -1700,6 +1700,8 @@ void VisualScript::CacheScriptingType()
 VisualScriptingBinaryModule::VisualScriptingBinaryModule()
    : _name("Visual Scripting")
 {
+    // Visual Scripts can be unloaded and loaded again even in game
+    CanReload = true;
 }

 ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const ScriptingObjectSpawnParams& params)
--- a/Source/Engine/Core/Collections/Dictionary.h
+++ b/Source/Engine/Core/Collections/Dictionary.h
@@ -4,6 +4,9 @@

 #include "HashSetBase.h"

+template<typename KeyType, typename ValueType, typename AllocationType>
+class ConcurrentDictionary;
+
 /// <summary>
 /// Describes single portion of space for the key and value pair in a hash map.
 /// </summary>
@@ -13,6 +16,7 @@ struct DictionaryBucket
    friend Memory;
    friend HashSetBase<AllocationType, DictionaryBucket>;
    friend Dictionary<KeyType, ValueType, AllocationType>;
+    friend ConcurrentDictionary<KeyType, ValueType, AllocationType>;

    /// <summary>The key.</summary>
    KeyType Key;
--- a/Source/Engine/Core/Types/Variant.cpp
+++ b/Source/Engine/Core/Types/Variant.cpp
@@ -18,8 +18,10 @@
 #include "Engine/Core/Math/Ray.h"
 #include "Engine/Core/Math/Rectangle.h"
 #include "Engine/Core/Math/Transform.h"
+#include "Engine/Scripting/BinaryModule.h"
 #include "Engine/Scripting/Scripting.h"
 #include "Engine/Scripting/ScriptingObject.h"
+#include "Engine/Scripting/ManagedCLR/MAssembly.h"
 #include "Engine/Scripting/ManagedCLR/MClass.h"
 #include "Engine/Scripting/ManagedCLR/MCore.h"
 #include "Engine/Scripting/ManagedCLR/MUtils.h"
@@ -88,6 +90,7 @@ static_assert((int32)VariantType::Types::MAX == ARRAY_COUNT(InBuiltTypesTypeName
 VariantType::VariantType(Types type, const StringView& typeName)
 {
    Type = type;
+    StaticName = 0;
    TypeName = nullptr;
    const int32 length = typeName.Length();
    if (length)
@@ -98,32 +101,41 @@ VariantType::VariantType(Types type, const StringView& typeName)
    }
 }

-VariantType::VariantType(Types type, const StringAnsiView& typeName)
+VariantType::VariantType(Types type, const StringAnsiView& typeName, bool staticName)
 {
    Type = type;
-    TypeName = nullptr;
-    int32 length = typeName.Length();
-    if (length)
+    StaticName = staticName && (typeName.HasChars() && typeName[typeName.Length()] == 0); // Require string to be null-terminated (not fully safe check)
+    if (staticName)
    {
-        TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
-        Platform::MemoryCopy(TypeName, typeName.Get(), length);
-        TypeName[length] = 0;
+        TypeName = (char*)typeName.Get();
    }
+    else
+    {
+        TypeName = nullptr;
+        int32 length = typeName.Length();
+        if (length)
+        {
+            TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
+            Platform::MemoryCopy(TypeName, typeName.Get(), length);
+            TypeName[length] = 0;
+        }
+    }
+}
+
+VariantType::VariantType(Types type, const ScriptingType& sType)
+    : VariantType(type)
+{
+    SetTypeName(sType);
 }

 VariantType::VariantType(Types type, const MClass* klass)
 {
    Type = type;
+    StaticName = false;
    TypeName = nullptr;
 #if USE_CSHARP
    if (klass)
-    {
-        const StringAnsiView typeName = klass->GetFullName();
-        const int32 length = typeName.Length();
-        TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
-        Platform::MemoryCopy(TypeName, typeName.Get(), length);
-        TypeName[length] = 0;
-    }
+        SetTypeName(*klass);
 #endif
 }

@@ -190,9 +202,9 @@ VariantType::VariantType(const StringAnsiView& typeName)
    if (const auto mclass = Scripting::FindClass(typeName))
    {
        if (mclass->IsEnum())
-            new(this) VariantType(Enum, typeName);
+            new(this) VariantType(Enum, mclass);
        else
-            new(this) VariantType(ManagedObject, typeName);
+            new(this) VariantType(ManagedObject, mclass);
        return;
    }
 #endif
@@ -204,36 +216,48 @@ VariantType::VariantType(const StringAnsiView& typeName)
 VariantType::VariantType(const VariantType& other)
 {
    Type = other.Type;
-    TypeName = nullptr;
-    const int32 length = StringUtils::Length(other.TypeName);
-    if (length)
+    StaticName = other.StaticName;
+    if (StaticName)
    {
-        TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
-        Platform::MemoryCopy(TypeName, other.TypeName, length);
-        TypeName[length] = 0;
+        TypeName = other.TypeName;
+    }
+    else
+    {
+        TypeName = nullptr;
+        const int32 length = StringUtils::Length(other.TypeName);
+        if (length)
+        {
+            TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
+            Platform::MemoryCopy(TypeName, other.TypeName, length);
+            TypeName[length] = 0;
+        }
    }
 }

 VariantType::VariantType(VariantType&& other) noexcept
 {
    Type = other.Type;
+    StaticName = other.StaticName;
    TypeName = other.TypeName;
    other.Type = Null;
    other.TypeName = nullptr;
+    other.StaticName = 0;
 }

 VariantType& VariantType::operator=(const Types& type)
 {
    Type = type;
-    Allocator::Free(TypeName);
+    if (StaticName)
+        Allocator::Free(TypeName);
    TypeName = nullptr;
+    StaticName = 0;
    return *this;
 }

 VariantType& VariantType::operator=(VariantType&& other)
 {
    ASSERT(this != &other);
-    Swap(Type, other.Type);
+    Swap(Packed, other.Packed);
    Swap(TypeName, other.TypeName);
    return *this;
 }
@@ -242,14 +266,23 @@ VariantType& VariantType::operator=(const VariantType& other)
 {
    ASSERT(this != &other);
    Type = other.Type;
-    Allocator::Free(TypeName);
-    TypeName = nullptr;
-    const int32 length = StringUtils::Length(other.TypeName);
-    if (length)
+    if (StaticName)
+        Allocator::Free(TypeName);
+    StaticName = other.StaticName;
+    if (StaticName)
    {
-        TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
-        Platform::MemoryCopy(TypeName, other.TypeName, length);
-        TypeName[length] = 0;
+        TypeName = other.TypeName;
+    }
+    else
+    {
+        TypeName = nullptr;
+        const int32 length = StringUtils::Length(other.TypeName);
+        if (length)
+        {
+            TypeName = static_cast<char*>(Allocator::Allocate(length + 1));
+            Platform::MemoryCopy(TypeName, other.TypeName, length);
+            TypeName[length] = 0;
+        }
    }
    return *this;
 }
@@ -283,24 +316,45 @@ void VariantType::SetTypeName(const StringView& typeName)
 {
    if (StringUtils::Length(TypeName) != typeName.Length())
    {
-        Allocator::Free(TypeName);
+        if (StaticName)
+            Allocator::Free(TypeName);
+        StaticName = 0;
        TypeName = static_cast<char*>(Allocator::Allocate(typeName.Length() + 1));
        TypeName[typeName.Length()] = 0;
    }
    StringUtils::ConvertUTF162ANSI(typeName.Get(), TypeName, typeName.Length());
 }

-void VariantType::SetTypeName(const StringAnsiView& typeName)
+void VariantType::SetTypeName(const StringAnsiView& typeName, bool staticName)
 {
-    if (StringUtils::Length(TypeName) != typeName.Length())
+    if (StringUtils::Length(TypeName) != typeName.Length() || StaticName != staticName)
    {
-        Allocator::Free(TypeName);
+        if (StaticName)
+            Allocator::Free(TypeName);
+        StaticName = staticName;
+        if (staticName)
+        {
+            TypeName = (char*)typeName.Get();
+            return;
+        }
        TypeName = static_cast<char*>(Allocator::Allocate(typeName.Length() + 1));
        TypeName[typeName.Length()] = 0;
    }
    Platform::MemoryCopy(TypeName, typeName.Get(), typeName.Length());
 }

+void VariantType::SetTypeName(const ScriptingType& type)
+{
+    SetTypeName(type.Fullname, type.Module->CanReload);
+}
+
+void VariantType::SetTypeName(const MClass& klass)
+{
+#if USE_CSHARP
+    SetTypeName(klass.GetFullName(), klass.GetAssembly()->CanReload());
+#endif
+}
+
 const char* VariantType::GetTypeName() const
 {
    if (TypeName)
@@ -322,6 +376,29 @@ VariantType VariantType::GetElementType() const
    return VariantType();
 }

+void VariantType::Inline()
+{
+    // Check if the typename comes from static assembly which can be used to inline name instead of dynamic memory allocation
+    StringAnsiView typeName(TypeName);
+    auto& modules = BinaryModule::GetModules();
+    for (auto module : modules)
+    {
+        int32 typeIndex;
+        if (!module->CanReload && module->FindScriptingType(typeName, typeIndex))
+        {
+            ScriptingTypeHandle typeHandle(module, typeIndex);
+            SetTypeName(typeHandle.GetType().Fullname, true);
+            return;
+        }
+    }
+
+#if USE_CSHARP
+    // Try with C#-only types
+    if (const auto mclass = Scripting::FindClass(TypeName))
+        SetTypeName(*mclass);
+#endif
+}
+
 ::String VariantType::ToString() const
 {
    ::String result;
@@ -632,8 +709,7 @@ Variant::Variant(ScriptingObject* v)
    AsObject = v;
    if (v)
    {
-        // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types)
-        Type.SetTypeName(v->GetType().Fullname);
+        Type.SetTypeName(v->GetType());
        v->Deleted.Bind<Variant, &Variant::OnObjectDeleted>(this);
    }
 }
@@ -644,9 +720,8 @@ Variant::Variant(Asset* v)
    AsAsset = v;
    if (v)
    {
-        // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types)
-        Type.SetTypeName(v->GetType().Fullname);
        v->AddReference();
+        Type.SetTypeName(v->GetType());
        v->OnUnloaded.Bind<Variant, &Variant::OnAssetUnloaded>(this);
    }
 }
@@ -3007,16 +3082,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName)
        switch (type.Type)
        {
        case ScriptingTypes::Script:
-            v.SetType(VariantType(VariantType::Object, typeName));
+            v.SetType(VariantType(VariantType::Object, type));
            v.AsObject = type.Script.Spawn(ScriptingObjectSpawnParams(Guid::New(), typeHandle));
            if (v.AsObject)
                v.AsObject->Deleted.Bind<Variant, &Variant::OnObjectDeleted>(&v);
            break;
        case ScriptingTypes::Structure:
-            v.SetType(VariantType(VariantType::Structure, typeName));
+            v.SetType(VariantType(VariantType::Structure, type));
            break;
        case ScriptingTypes::Enum:
-            v.SetType(VariantType(VariantType::Enum, typeName));
+            v.SetType(VariantType(VariantType::Enum, type));
            v.AsEnum = 0;
            break;
        default:
@@ -3030,16 +3105,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName)
        // Fallback to C#-only types
        if (mclass->IsEnum())
        {
-            v.SetType(VariantType(VariantType::Enum, typeName));
+            v.SetType(VariantType(VariantType::Enum, mclass));
            v.AsEnum = 0;
        }
        else if (mclass->IsValueType())
        {
-            v.SetType(VariantType(VariantType::Structure, typeName));
+            v.SetType(VariantType(VariantType::Structure, mclass));
        }
        else
        {
-            v.SetType(VariantType(VariantType::ManagedObject, typeName));
+            v.SetType(VariantType(VariantType::ManagedObject, mclass));
            MObject* instance = mclass->CreateInstance();
            if (instance)
            {
--- a/Source/Engine/Core/Types/Variant.h
+++ b/Source/Engine/Core/Types/Variant.h
@@ -17,7 +17,7 @@ struct ScriptingTypeHandle;
 /// </summary>
 API_STRUCT(InBuild) struct FLAXENGINE_API VariantType
 {
-    enum Types
+    enum Types : uint8
    {
        Null = 0,
        Void,
@@ -80,10 +80,22 @@ API_STRUCT(InBuild) struct FLAXENGINE_API VariantType
    };

 public:
-    /// <summary>
-    /// The type of the variant.
-    /// </summary>
-    Types Type;
+    union
+    {
+        struct
+        {
+            /// <summary>
+            /// The type of the variant.
+            /// </summary>
+            Types Type;
+
+            /// <summary>
+            /// Internal flag used to indicate that pointer to TypeName has been linked from a static/external memory that is stable (eg. ScriptingType or MClass). Allows avoiding dynamic memory allocation.
+            /// </summary>
+            uint8 StaticName : 1;
+        };
+        uint16 Packed;
+    };

    /// <summary>
    /// The optional additional full name of the scripting type. Used for Asset, Object, Enum, Structure types to describe type precisely.
@@ -94,17 +106,20 @@ public:
    FORCE_INLINE VariantType()
    {
        Type = Null;
+        StaticName = 0;
        TypeName = nullptr;
    }

    FORCE_INLINE explicit VariantType(Types type)
    {
        Type = type;
+        StaticName = 0;
        TypeName = nullptr;
    }

    explicit VariantType(Types type, const StringView& typeName);
-    explicit VariantType(Types type, const StringAnsiView& typeName);
+    explicit VariantType(Types type, const StringAnsiView& typeName, bool staticName = false);
+    explicit VariantType(Types type, const ScriptingType& sType);
    explicit VariantType(Types type, const MClass* klass);
    explicit VariantType(const StringAnsiView& typeName);
    VariantType(const VariantType& other);
@@ -112,7 +127,8 @@ public:

    FORCE_INLINE ~VariantType()
    {
-        Allocator::Free(TypeName);
+        if (!StaticName)
+            Allocator::Free(TypeName);
    }

 public:
@@ -130,9 +146,13 @@ public:

 public:
    void SetTypeName(const StringView& typeName);
-    void SetTypeName(const StringAnsiView& typeName);
+    void SetTypeName(const StringAnsiView& typeName, bool staticName = false);
+    void SetTypeName(const ScriptingType& type);
+    void SetTypeName(const MClass& klass);
    const char* GetTypeName() const;
    VariantType GetElementType() const;
+    // Drops custom type name into the name allocated by the scripting module to reduce memory allocations when referencing types.
+    void Inline();
    ::String ToString() const;
 };

--- a/Source/Engine/Foliage/Foliage.cpp
+++ b/Source/Engine/Foliage/Foliage.cpp
@@ -7,17 +7,17 @@
 #include "Engine/Core/Random.h"
 #include "Engine/Engine/Engine.h"
 #include "Engine/Graphics/RenderTask.h"
+#include "Engine/Graphics/GPUDevice.h"
 #include "Engine/Content/Deprecated.h"
 #if !FOLIAGE_USE_SINGLE_QUAD_TREE
 #include "Engine/Threading/JobSystem.h"
 #if FOLIAGE_USE_DRAW_CALLS_BATCHING
 #include "Engine/Graphics/RenderTools.h"
-#include "Engine/Graphics/GPUDevice.h"
-#include "Engine/Renderer/RenderList.h"
 #endif
 #endif
 #include "Engine/Level/SceneQuery.h"
 #include "Engine/Profiler/ProfilerCPU.h"
+#include "Engine/Renderer/RenderList.h"
 #include "Engine/Renderer/GlobalSignDistanceFieldPass.h"
 #include "Engine/Renderer/GI/GlobalSurfaceAtlasPass.h"
 #include "Engine/Serialization/Serialization.h"
@@ -41,8 +41,7 @@ Foliage::Foliage(const SpawnParams& params)

 void Foliage::AddToCluster(ChunkedArray<FoliageCluster, FOLIAGE_CLUSTER_CHUNKS_SIZE>& clusters, FoliageCluster* cluster, FoliageInstance& instance)
 {
-    ASSERT(instance.Bounds.Radius > ZeroTolerance);
-    ASSERT(cluster->Bounds.Intersects(instance.Bounds));
+    ASSERT_LOW_LAYER(instance.Bounds.Radius > ZeroTolerance);

    // Minor clusters don't use bounds intersection but try to find the first free cluster instead
    if (cluster->IsMinor)
@@ -63,6 +62,7 @@ void Foliage::AddToCluster(ChunkedArray<FoliageCluster, FOLIAGE_CLUSTER_CHUNKS_S
    else
    {
        // Find target cluster
+        ASSERT(cluster->Bounds.Intersects(instance.Bounds));
        while (cluster->Children[0])
        {
 #define CHECK_CHILD(idx) \
@@ -193,6 +193,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
        // Draw visible instances
        const auto frame = Engine::FrameCount;
        const auto model = type.Model.Get();
+        const auto transitionLOD = renderContext.View.Pass != DrawPass::Depth; // Let the main view pass update LOD transitions
+        // TODO: move DrawState to be stored per-view (so shadows can fade objects on their own)
        for (int32 i = 0; i < cluster->Instances.Count(); i++)
        {
            auto& instance = *cluster->Instances.Get()[i];
@@ -210,20 +212,29 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
                    // Handling model fade-out transition
                    if (modelFrame == frame && instance.DrawState.PrevLOD != -1)
                    {
-                        // Check if start transition
-                        if (instance.DrawState.LODTransition == 255)
+                        if (transitionLOD)
                        {
-                            instance.DrawState.LODTransition = 0;
-                        }
+                            // Check if start transition
+                            if (instance.DrawState.LODTransition == 255)
+                            {
+                                instance.DrawState.LODTransition = 0;
+                            }

-                        RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);
+                            RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);

-                        // Check if end transition
-                        if (instance.DrawState.LODTransition == 255)
-                        {
-                            instance.DrawState.PrevLOD = lodIndex;
+                            // Check if end transition
+                            if (instance.DrawState.LODTransition == 255)
+                            {
+                                instance.DrawState.PrevLOD = lodIndex;
+                            }
+                            else
+                            {
+                                const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD);
+                                const float normalizedProgress = static_cast<float>(instance.DrawState.LODTransition) * (1.0f / 255.0f);
+                                DrawInstance(renderContext, instance, type, model, prevLOD, normalizedProgress, drawCallsLists, result);
+                            }
                        }
-                        else
+                        else if (instance.DrawState.LODTransition < 255)
                        {
                            const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD);
                            const float normalizedProgress = static_cast<float>(instance.DrawState.LODTransition) * (1.0f / 255.0f);
@@ -236,29 +247,32 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
                lodIndex += renderContext.View.ModelLODBias;
                lodIndex = model->ClampLODIndex(lodIndex);

-                // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports)
-                if (modelFrame == frame)
+                if (transitionLOD)
                {
-                    // Check if start transition
-                    if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255)
+                    // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports)
+                    if (modelFrame == frame)
                    {
+                        // Check if start transition
+                        if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255)
+                        {
+                            instance.DrawState.LODTransition = 0;
+                        }
+
+                        RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);
+
+                        // Check if end transition
+                        if (instance.DrawState.LODTransition == 255)
+                        {
+                            instance.DrawState.PrevLOD = lodIndex;
+                        }
+                    }
+                    // Check if there was a gap between frames in drawing this model instance
+                    else if (modelFrame < frame || instance.DrawState.PrevLOD == -1)
+                    {
+                        // Reset state
+                        instance.DrawState.PrevLOD = lodIndex;
                        instance.DrawState.LODTransition = 0;
                    }
-
-                    RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition);
-
-                    // Check if end transition
-                    if (instance.DrawState.LODTransition == 255)
-                    {
-                        instance.DrawState.PrevLOD = lodIndex;
-                    }
-                }
-                // Check if there was a gap between frames in drawing this model instance
-                else if (modelFrame < frame || instance.DrawState.PrevLOD == -1)
-                {
-                    // Reset state
-                    instance.DrawState.PrevLOD = lodIndex;
-                    instance.DrawState.LODTransition = 255;
                }

                // Draw
@@ -281,7 +295,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,

                //DebugDraw::DrawSphere(instance.Bounds, Color::YellowGreen);

-                instance.DrawState.PrevFrame = frame;
+                if (transitionLOD)
+                    instance.DrawState.PrevFrame = frame;
            }
        }
    }
@@ -350,7 +365,7 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster,
                draw.DrawState = &instance.DrawState;
                draw.Bounds = sphere;
                draw.PerInstanceRandom = instance.Random;
-                draw.DrawModes = type._drawModes;
+                draw.DrawModes = type.DrawModes;
                draw.SetStencilValue(_layer);
                type.Model->Draw(renderContext, draw);

--- a/Source/Engine/Foliage/FoliageCluster.cpp
+++ b/Source/Engine/Foliage/FoliageCluster.cpp
@@ -21,26 +21,7 @@ void FoliageCluster::Init(const BoundingBox& bounds)

 void FoliageCluster::UpdateTotalBoundsAndCullDistance()
 {
-    if (Children[0])
-    {
-        ASSERT(Instances.IsEmpty());
-
-        Children[0]->UpdateTotalBoundsAndCullDistance();
-        Children[1]->UpdateTotalBoundsAndCullDistance();
-        Children[2]->UpdateTotalBoundsAndCullDistance();
-        Children[3]->UpdateTotalBoundsAndCullDistance();
-
-        TotalBounds = Children[0]->TotalBounds;
-        BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds);
-        BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds);
-        BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds);
-
-        MaxCullDistance = Children[0]->MaxCullDistance;
-        MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance);
-        MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance);
-        MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance);
-    }
-    else if (Instances.HasItems())
+    if (Instances.HasItems())
    {
        BoundingBox box;
        BoundingBox::FromSphere(Instances[0]->Bounds, TotalBounds);
@@ -58,6 +39,30 @@ void FoliageCluster::UpdateTotalBoundsAndCullDistance()
        MaxCullDistance = 0;
    }

+    if (Children[0])
+    {
+        Children[0]->UpdateTotalBoundsAndCullDistance();
+        Children[1]->UpdateTotalBoundsAndCullDistance();
+        Children[2]->UpdateTotalBoundsAndCullDistance();
+        Children[3]->UpdateTotalBoundsAndCullDistance();
+
+        if (Instances.HasItems())
+            BoundingBox::Merge(TotalBounds, Children[0]->TotalBounds, TotalBounds);
+        else
+            TotalBounds = Children[0]->TotalBounds;
+        BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds);
+        BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds);
+        BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds);
+
+        if (Instances.HasItems())
+            MaxCullDistance = Math::Max(MaxCullDistance, Children[0]->MaxCullDistance);
+        else
+            MaxCullDistance = Children[0]->MaxCullDistance;
+        MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance);
+        MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance);
+        MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance);
+    }
+
    BoundingSphere::FromBox(TotalBounds, TotalBoundsSphere);
 }

--- a/Source/Engine/Graphics/Models/SkeletonData.h
+++ b/Source/Engine/Graphics/Models/SkeletonData.h
@@ -73,6 +73,10 @@ struct TIsPODType<SkeletonBone>
 /// </remarks>
 class FLAXENGINE_API SkeletonData
 {
+private:
+    mutable volatile int64 _dirty = 1;
+    mutable Array<Matrix> _cachedPose;
+
 public:
    /// <summary>
    /// The nodes in this hierarchy. The root node is always at the index 0.
@@ -114,6 +118,11 @@ public:
    int32 FindNode(const StringView& name) const;
    int32 FindBone(int32 nodeIndex) const;

+    // Gets the skeleton nodes transforms in mesh space (pose). Calculated from the local node transforms and hierarchy. Cached internally and updated when data is dirty.
+    const Array<Matrix>& GetNodesPose() const;
+
+    // Marks data as dirty (modified) to update internal state and recalculate cached data if needed (eg. skeleton pose).
+    void Dirty();
    uint64 GetMemoryUsage() const;

    /// <summary>
--- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp
+++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp
@@ -154,6 +154,8 @@ void SkeletonData::Swap(SkeletonData& other)
 {
    Nodes.Swap(other.Nodes);
    Bones.Swap(other.Bones);
+    Dirty();
+    other.Dirty();
 }

 Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const
@@ -171,6 +173,7 @@ Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const
 void SkeletonData::SetNodeTransform(int32 nodeIndex, const Transform& value)
 {
    CHECK(Nodes.IsValidIndex(nodeIndex));
+    Dirty();
    const int32 parentIndex = Nodes[nodeIndex].ParentIndex;
    if (parentIndex == -1)
    {
@@ -201,6 +204,39 @@ int32 SkeletonData::FindBone(int32 nodeIndex) const
    return -1;
 }

+const Array<Matrix>& SkeletonData::GetNodesPose() const
+{
+    // Guard with a simple atomic flag to avoid locking if the pose is up to date
+    if (Platform::AtomicRead(&_dirty))
+    {
+        ScopeLock lock(RenderContext::GPULocker);
+        if (Platform::AtomicRead(&_dirty))
+        {
+            const SkeletonNode* nodes = Nodes.Get();
+            const int32 nodesCount = Nodes.Count();
+            _cachedPose.Resize(nodesCount);
+            Matrix* posePtr = _cachedPose.Get();
+            for (int32 nodeIndex = 0; nodeIndex < nodesCount; nodeIndex++)
+            {
+                const SkeletonNode& node = nodes[nodeIndex];
+                Matrix local;
+                Matrix::Transformation(node.LocalTransform.Scale, node.LocalTransform.Orientation, node.LocalTransform.Translation, local);
+                if (node.ParentIndex != -1)
+                    Matrix::Multiply(local, posePtr[node.ParentIndex], posePtr[nodeIndex]);
+                else
+                    posePtr[nodeIndex] = local;
+            }
+            Platform::AtomicStore(&_dirty, 0);
+        }
+    }
+    return _cachedPose;
+}
+
+void SkeletonData::Dirty()
+{
+    Platform::AtomicStore(&_dirty, 1);
+}
+
 uint64 SkeletonData::GetMemoryUsage() const
 {
    uint64 result = Nodes.Capacity() * sizeof(SkeletonNode) + Bones.Capacity() * sizeof(SkeletonBone);
--- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp
+++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp
@@ -5,11 +5,6 @@
 #include "Engine/Animations/Config.h"
 #include "Engine/Core/Log.h"
 #include "Engine/Core/Math/Matrix.h"
-#include "Engine/Core/Math/Matrix3x4.h"
-
-SkinnedMeshDrawData::SkinnedMeshDrawData()
-{
-}

 SkinnedMeshDrawData::~SkinnedMeshDrawData()
 {
@@ -33,7 +28,7 @@ void SkinnedMeshDrawData::Setup(int32 bonesCount)

    BonesCount = bonesCount;
    _hasValidData = false;
-    _isDirty = false;
+    _isDirty = true;
    Data.Resize(BoneMatrices->GetSize());
    SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices);
 }
--- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h
+++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h
@@ -36,11 +36,6 @@ public:
    Array<byte> Data;

 public:
-    /// <summary>
-    /// Initializes a new instance of the <see cref="SkinnedMeshDrawData"/> class.
-    /// </summary>
-    SkinnedMeshDrawData();
-
    /// <summary>
    /// Finalizes an instance of the <see cref="SkinnedMeshDrawData"/> class.
    /// </summary>
@@ -76,7 +71,7 @@ public:
    void OnDataChanged(bool dropHistory);

    /// <summary>
-    /// After bones Data has been send to the GPU buffer.
+    /// After bones Data has been sent to the GPU buffer.
    /// </summary>
    void OnFlush()
    {
--- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp
+++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp
@@ -8,6 +8,7 @@
 #include "Engine/Graphics/GPUDevice.h"
 #include "Engine/Graphics/GPUBuffer.h"
 #include "Engine/Graphics/PixelFormatExtensions.h"
+#include "Engine/Threading/ConcurrentDictionary.h"
 #if GPU_ENABLE_RESOURCE_NAMING
 #include "Engine/Scripting/Enums.h"
 #endif
@@ -40,27 +41,37 @@ uint32 GetHash(const VertexBufferLayouts& key)

 namespace
 {
-    CriticalSection CacheLocker;
-    Dictionary<uint32, GPUVertexLayout*> LayoutCache;
-    Dictionary<VertexBufferLayouts, GPUVertexLayout*> VertexBufferCache;
+    ConcurrentDictionary<uint32, GPUVertexLayout*> LayoutCache;
+    ConcurrentDictionary<VertexBufferLayouts, GPUVertexLayout*> VertexBufferCache;

-    GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count)
+    GPUVertexLayout* GetCache(const VertexBufferLayouts& key, int32 count)
    {
-        GPUVertexLayout::Elements elements;
-        bool anyValid = false;
-        for (int32 slot = 0; slot < count; slot++)
+        GPUVertexLayout* result;
+        if (!VertexBufferCache.TryGet(key, result))
        {
-            if (key.Layouts[slot])
+            GPUVertexLayout::Elements elements;
+            bool anyValid = false;
+            for (int32 slot = 0; slot < count; slot++)
            {
-                anyValid = true;
-                int32 start = elements.Count();
-                elements.Add(key.Layouts[slot]->GetElements());
-                for (int32 j = start; j < elements.Count(); j++)
-                    elements.Get()[j].Slot = (byte)slot;
+                if (key.Layouts[slot])
+                {
+                    anyValid = true;
+                    int32 start = elements.Count();
+                    elements.Add(key.Layouts[slot]->GetElements());
+                    for (int32 j = start; j < elements.Count(); j++)
+                        elements.Get()[j].Slot = (byte)slot;
+                }
            }
+            result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr;
+            if (!VertexBufferCache.Add(key, result))
+            {
+                // Other thread added the value
+                Delete(result);
+                bool found = VertexBufferCache.TryGet(key, result);
+                ASSERT(found);
+            }
+
        }
-        GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr;
-        VertexBufferCache.Add(key, result);
        return result;
    }
 }
@@ -148,7 +159,6 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff
    }

    // Lookup existing cache
-    CacheLocker.Lock();
    GPUVertexLayout* result;
    if (!LayoutCache.TryGet(hash, result))
    {
@@ -160,12 +170,16 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff
                LOG(Error, " {}", e.ToString());
 #endif
            LOG(Error, "Failed to create vertex layout");
-            CacheLocker.Unlock();
            return nullptr;
        }
-        LayoutCache.Add(hash, result);
+        if (!LayoutCache.Add(hash, result))
+        {
+            // Other thread added the value
+            Delete(result);
+            bool found = LayoutCache.TryGet(hash, result);
+            ASSERT(found);
+        }
    }
-    CacheLocker.Unlock();

    return result;
 }
@@ -185,13 +199,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUBuffer*>& vertexBuffers)
        key.Layouts[i] = nullptr;

    // Lookup existing cache
-    CacheLocker.Lock();
-    GPUVertexLayout* result;
-    if (!VertexBufferCache.TryGet(key, result))
-        result = AddCache(key, vertexBuffers.Length());
-    CacheLocker.Unlock();
-
-    return result;
+    return GetCache(key, vertexBuffers.Length());
 }

 GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
@@ -209,13 +217,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
        key.Layouts[i] = nullptr;

    // Lookup existing cache
-    CacheLocker.Lock();
-    GPUVertexLayout* result;
-    if (!VertexBufferCache.TryGet(key, result))
-        result = AddCache(key, layouts.Length());
-    CacheLocker.Unlock();
-
-    return result;
+    return GetCache(key, layouts.Length());
 }

 GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder)
--- a/Source/Engine/Level/Actor.cpp
+++ b/Source/Engine/Level/Actor.cpp
@@ -1685,7 +1685,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos) const
 {
    const Vector3 direction = worldPos - _transform.Translation;
    if (direction.LengthSquared() < ZeroTolerance)
-        return _parent->GetOrientation();
+        return _parent ? _parent->GetOrientation() : Quaternion::Identity;

    const Float3 newForward = Vector3::Normalize(direction);
    const Float3 oldForward = _transform.Orientation * Vector3::Forward;
@@ -1712,7 +1712,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos, const Vector3& worldUp) con
 {
    const Vector3 direction = worldPos - _transform.Translation;
    if (direction.LengthSquared() < ZeroTolerance)
-        return _parent->GetOrientation();
+        return _parent ? _parent->GetOrientation() : Quaternion::Identity;
    const Float3 forward = Vector3::Normalize(direction);
    const Float3 up = Vector3::Normalize(worldUp);
    if (Math::IsOne(Float3::Dot(forward, up)))
--- a/Source/Engine/Level/Actors/AnimatedModel.cpp
+++ b/Source/Engine/Level/Actors/AnimatedModel.cpp
@@ -14,14 +14,84 @@
 #include "Engine/Content/Deprecated.h"
 #include "Engine/Graphics/GPUContext.h"
 #include "Engine/Graphics/GPUDevice.h"
+#include "Engine/Graphics/GPUPass.h"
 #include "Engine/Graphics/RenderTask.h"
 #include "Engine/Graphics/Models/MeshAccessor.h"
 #include "Engine/Graphics/Models/MeshDeformation.h"
+#include "Engine/Renderer/RenderList.h"
 #include "Engine/Level/Scene/Scene.h"
 #include "Engine/Level/SceneObjectsFactory.h"
-#include "Engine/Profiler/ProfilerMemory.h"
+#include "Engine/Profiler/Profiler.h"
 #include "Engine/Serialization/Serialization.h"

+// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models.
+class AnimatedModelRenderListExtension : public RenderList::IExtension
+{
+public:
+    struct Item
+    {
+        GPUBuffer* BoneMatrices;
+        void* Data;
+        int32 Size;
+    };
+
+    RenderListBuffer<Item> Items;
+
+    void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
+    {
+        Items.Clear();
+    }
+
+    void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override
+    {
+        const int32 count = Items.Count();
+        if (count == 0)
+            return;
+        PROFILE_GPU_CPU_NAMED("Update Bones");
+        GPUMemoryPass pass(context);
+        Item* items = Items.Get();
+
+        // Special case for D3D11 backend that doesn't need transitions
+        if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11)
+        {
+            for (int32 i = 0; i < count; i++)
+            {
+                Item& item = items[i];
+                context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
+            }
+        }
+        else
+        {
+            // Batch resource barriers for buffer update
+            for (int32 i = 0; i < count; i++)
+                pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite);
+
+            // Update all buffers within Memory Pass (no barriers between)
+            for (int32 i = 0; i < count; i++)
+            {
+                Item& item = items[i];
+                context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size);
+            }
+
+            // Batch resource barriers for reading in Vertex Shader
+            for (int32 i = 0; i < count; i++)
+                pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics);
+        }
+
+#if COMPILE_WITH_PROFILER
+        // Insert amount of kilobytes of data updated into profiler trace
+        uint32 dataSize = 0;
+        for (int32 i = 0; i < count; i++)
+            dataSize += items[i].Size;
+        ZoneValue(dataSize / 1024);
+#endif
+
+        Items.Clear();
+    }
+};
+
+AnimatedModelRenderListExtension RenderListExtension;
+
 AnimatedModel::AnimatedModel(const SpawnParams& params)
    : ModelInstanceActor(params)
    , _actualMode(AnimationUpdateMode::Never)
@@ -1002,7 +1072,7 @@ void AnimatedModel::Draw(RenderContext& renderContext)
    if (renderContext.View.Pass == DrawPass::GlobalSDF)
        return;
    if (renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas)
-        return; // No supported
+        return; // Not supported
    ACTOR_GET_WORLD_MATRIX(this, view, world);
    GEOMETRY_DRAW_STATE_EVENT_BEGIN(_drawState, world);

@@ -1012,9 +1082,8 @@ void AnimatedModel::Draw(RenderContext& renderContext)
        // Flush skinning data with GPU
        if (_skinningData.IsDirty())
        {
-            RenderContext::GPULocker.Lock();
-            GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
-            RenderContext::GPULocker.Unlock();
+            RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
+            _skinningData.OnFlush();
        }

        SkinnedMesh::DrawInfo draw;
@@ -1056,9 +1125,8 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch)
        // Flush skinning data with GPU
        if (_skinningData.IsDirty())
        {
-            RenderContext::GPULocker.Lock();
-            GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count());
-            RenderContext::GPULocker.Unlock();
+            RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() });
+            _skinningData.OnFlush();
        }

        SkinnedMesh::DrawInfo draw;
--- a/Source/Engine/Particles/Particles.cpp
+++ b/Source/Engine/Particles/Particles.cpp
@@ -677,11 +677,10 @@ void CleanupGPUParticlesSorting()
    SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer);
 }

-void DrawEmittersGPU(RenderContextBatch& renderContextBatch)
+void DrawEmittersGPU(GPUContext* context, RenderContextBatch& renderContextBatch)
 {
    PROFILE_GPU_CPU_NAMED("DrawEmittersGPU");
    ScopeReadLock systemScope(Particles::SystemLocker);
-    GPUContext* context = GPUDevice::Instance->GetMainContext();

    // Count draws and sorting passes needed for resources allocation
    uint32 indirectArgsSize = 0;
@@ -1124,9 +1123,9 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
    if (GPUEmitterDraws.Count() == 0)
    {
        // The first emitter schedules the drawing of all batched draws
-        renderContextBatch.GetMainContext().List->AddDelayedDraw([](RenderContextBatch& renderContextBatch, int32 contextIndex)
+        renderContextBatch.GetMainContext().List->AddDelayedDraw([](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
        {
-            DrawEmittersGPU(renderContextBatch);
+            DrawEmittersGPU(context, renderContextBatch);
        });
    }
    GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting });
--- a/Source/Engine/Platform/Base/FileSystemBase.cpp
+++ b/Source/Engine/Platform/Base/FileSystemBase.cpp
@@ -12,25 +12,25 @@

 bool FileSystemBase::ShowOpenFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array<String, HeapAllocation>& filenames)
 {
-    // No supported
+    // Not supported
    return true;
 }

 bool FileSystemBase::ShowSaveFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array<String, HeapAllocation>& filenames)
 {
-    // No supported
+    // Not supported
    return true;
 }

 bool FileSystemBase::ShowBrowseFolderDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& title, String& path)
 {
-    // No supported
+    // Not supported
    return true;
 }

 bool FileSystemBase::ShowFileExplorer(const StringView& path)
 {
-    // No supported
+    // Not supported
    return true;
 }

--- a/Source/Engine/Renderer/RenderList.cpp
+++ b/Source/Engine/Renderer/RenderList.cpp
@@ -15,6 +15,7 @@
 #include "Engine/Profiler/Profiler.h"
 #include "Engine/Content/Assets/CubeTexture.h"
 #include "Engine/Core/Log.h"
+#include "Engine/Core/Math/Half.h"
 #include "Engine/Graphics/Shaders/GPUVertexLayout.h"
 #include "Engine/Level/Scene/Lightmap.h"
 #include "Engine/Level/Actors/PostFxVolume.h"
@@ -30,6 +31,13 @@ namespace
    Array<RenderList*> FreeRenderList;
    Array<Pair<void*, uintptr>> MemPool;
    CriticalSection MemPoolLocker;
+
+    typedef Array<RenderList::IExtension*, FixedAllocation<8>> ExtensionsList;
+    ExtensionsList& GetExtensions()
+    {
+        static ExtensionsList list;
+        return list;
+    }
 }

 void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor)
@@ -235,6 +243,16 @@ void RenderList::CleanupCache()
    MemPoolLocker.Unlock();
 }

+RenderList::IExtension::IExtension()
+{
+    GetExtensions().Add(this);
+}
+
+RenderList::IExtension::~IExtension()
+{
+    GetExtensions().Remove(this);
+}
+
 bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const
 {
    // Sort by higher priority
@@ -257,18 +275,31 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh

 void RenderList::AddDelayedDraw(DelayedDraw&& func)
 {
-    MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc
    _delayedDraws.Add(MoveTemp(func));
-    MemPoolLocker.Unlock();
 }

-void RenderList::DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex)
+void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)
 {
-    if (_delayedDraws.IsEmpty())
+    if (_delayedDraws.Count() == 0)
        return;
+    PROFILE_CPU();
    for (DelayedDraw& e : _delayedDraws)
-        e(renderContextBatch, contextIndex);
-    _delayedDraws.SetCapacity(0);
+        e(context, renderContextBatch, renderContextIndex);
+    _delayedDraws.Clear();
+}
+
+#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions)
+
+void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
+{
+    LOOP_EXTENSIONS()
+        e->PreDraw(context, renderContextBatch);
+}
+
+void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch)
+{
+    LOOP_EXTENSIONS()
+        e->PostDraw(context, renderContextBatch);
 }

 void RenderList::BlendSettings()
@@ -494,7 +525,6 @@ RenderList::RenderList(const SpawnParams& params)
    , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
    , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer"))
    , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } }))
-    , _delayedDraws(&Memory)
 {
 }

@@ -826,6 +856,13 @@ FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b)
            Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0;
 }

+FORCE_INLINE Span<GPUBuffer*> GetVB(GPUBuffer* const* ptr, int32 maxSize)
+{
+    while (ptr[maxSize - 1] == nullptr && maxSize > 1)
+        maxSize--;
+    return ToSpan<GPUBuffer*>(ptr, maxSize);
+}
+
 void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input)
 {
    if (list.IsEmpty())
@@ -954,7 +991,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
                    Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
                    Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
                    context->BindIB(activeDraw->Geometry.IndexBuffer);
-                    context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets);
+                    context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets);
                    context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex);
                    instanceBufferOffset += activeCount;

@@ -971,7 +1008,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL

                // Single-draw call batch
                context->BindIB(drawCall.Geometry.IndexBuffer);
-                context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
+                context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
                if (drawCall.InstanceCount == 0)
                {
                    context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
@@ -994,7 +1031,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
            Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
            Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
            context->BindIB(drawCall.Geometry.IndexBuffer);
-            context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets);
+            context->BindVB(GetVB(vb, vbMax + 1), vbOffsets);

            if (drawCall.InstanceCount == 0)
            {
@@ -1024,7 +1061,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL

                const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
                context->BindIB(drawCall.Geometry.IndexBuffer);
-                context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
+                context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);

                if (drawCall.InstanceCount == 0)
                {
@@ -1045,7 +1082,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
            bindParams.DrawCall->Material->Bind(bindParams);

            context->BindIB(drawCall.Geometry.IndexBuffer);
-            context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
+            context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);

            for (int32 j = 0; j < batch.Instances.Count(); j++)
            {
@@ -1069,7 +1106,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
                drawCall.Material->Bind(bindParams);

                context->BindIB(drawCall.Geometry.IndexBuffer);
-                context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
+                context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);

                if (drawCall.InstanceCount == 0)
                {
--- a/Source/Engine/Renderer/RenderList.h
+++ b/Source/Engine/Renderer/RenderList.h
@@ -4,7 +4,6 @@

 #include "Engine/Core/Collections/Array.h"
 #include "Engine/Core/Memory/ArenaAllocation.h"
-#include "Engine/Core/Math/Half.h"
 #include "Engine/Graphics/PostProcessSettings.h"
 #include "Engine/Graphics/DynamicBuffer.h"
 #include "Engine/Scripting/ScriptingObject.h"
@@ -327,6 +326,21 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject
    /// </summary>
    static void CleanupCache();

+    /// <summary>
+    /// The rendering extension interface for custom drawing/effects linked to RenderList. Can be used during async scene drawing and further drawing/processing for more optimized rendering.
+    /// </summary>
+    class FLAXENGINE_API IExtension
+    {
+    public:
+        IExtension();
+        virtual ~IExtension();
+
+        // Event called before collecting draw calls. Can be used for initialization.
+        virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {}
+        // Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing.
+        virtual void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {}
+    };
+
 public:
    /// <summary>
    /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs.
@@ -460,13 +474,14 @@ public:
    /// </summary>
    DynamicTypedBuffer TempObjectBuffer;

-    typedef Function<void(RenderContextBatch& renderContextBatch, int32 contextIndex)> DelayedDraw;
+    typedef Function<void(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex)> DelayedDraw;
    void AddDelayedDraw(DelayedDraw&& func);
-    void DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex);
+    void DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex);

    /// <summary>
    /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing.
    /// </summary>
+    /// <remarks>Can be called in async during scene rendering (thread-safe internally). Lambda is allocated by concurrent arena allocator owned by the RenderList.</remarks>
    template<typename T>
    FORCE_INLINE void AddDelayedDraw(const T& lambda)
    {
@@ -475,9 +490,13 @@ public:
        AddDelayedDraw(MoveTemp(func));
    }

+    // IExtension implementation
+    void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch);
+    void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch);
+
 private:
    DynamicVertexBuffer _instanceBuffer;
-    Array<DelayedDraw, ConcurrentArenaAllocation> _delayedDraws;
+    RenderListBuffer<DelayedDraw> _delayedDraws;

 public:
    /// <summary>
--- a/Source/Engine/Renderer/Renderer.cpp
+++ b/Source/Engine/Renderer/Renderer.cpp
@@ -423,6 +423,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
        if (setup.UseMotionVectors)
            view.Pass |= DrawPass::MotionVectors;
        renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value
+        renderContext.List->PreDraw(context, renderContextBatch);

        bool drawShadows = !isGBufferDebug && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows) && ShadowsPass::Instance()->IsReady();
        switch (renderContext.View.Mode)
@@ -461,7 +462,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont

        // Perform custom post-scene drawing (eg. GPU dispatches used by VFX)
        for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
-            renderContextBatch.Contexts[i].List->DrainDelayedDraws(renderContextBatch, i);
+            renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i);
+        renderContext.List->PostDraw(context, renderContextBatch);

 #if USE_EDITOR
        GBufferPass::Instance()->OverrideDrawCalls(renderContext);
--- a/Source/Engine/Scripting/BinaryModule.cpp
+++ b/Source/Engine/Scripting/BinaryModule.cpp
@@ -683,6 +683,8 @@ BinaryModule* BinaryModule::GetModule(const StringAnsiView& name)

 BinaryModule::BinaryModule()
 {
+    CanReload = USE_EDITOR;
+
    // Register
    GetModules().Add(this);
 }
--- a/Source/Engine/Scripting/BinaryModule.h
+++ b/Source/Engine/Scripting/BinaryModule.h
@@ -91,6 +91,11 @@ public:
    /// </summary>
    Dictionary<StringAnsi, int32> TypeNameToTypeIndex;

+    /// <summary>
+    /// Determinates whether module can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some modules such as engine and class library modules are static.
+    /// </summary>
+    bool CanReload;
+
 public:

    /// <summary>
--- a/Source/Engine/Scripting/ManagedCLR/MAssembly.h
+++ b/Source/Engine/Scripting/ManagedCLR/MAssembly.h
@@ -34,6 +34,7 @@ private:

    int32 _isLoaded : 1;
    int32 _isLoading : 1;
+    int32 _canReload : 1;
    mutable int32 _hasCachedClasses : 1;

    mutable ClassesDictionary _classes;
@@ -125,6 +126,14 @@ public:
        return _isLoaded != 0;
    }

+    /// <summary>
+    /// Returns true if assembly can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some assemblies such as engine and class library modules are static.
+    /// </summary>
+    FORCE_INLINE bool CanReload() const
+    {
+        return USE_EDITOR && _canReload;
+    }
+
    /// <summary>
    /// Gets the assembly name.
    /// </summary>
--- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp
+++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp
@@ -45,6 +45,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name)
    : _domain(domain)
    , _isLoaded(false)
    , _isLoading(false)
+    , _canReload(true)
    , _hasCachedClasses(false)
    , _reloadCount(0)
    , _name(name)
@@ -59,6 +60,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name, const StringAn
    , _domain(domain)
    , _isLoaded(false)
    , _isLoading(false)
+    , _canReload(true)
    , _hasCachedClasses(false)
    , _reloadCount(0)
    , _name(name)
--- a/Source/Engine/Scripting/Runtime/DotNet.cpp
+++ b/Source/Engine/Scripting/Runtime/DotNet.cpp
@@ -874,6 +874,7 @@ bool MAssembly::LoadCorlib()
        return true;
    }
    _hasCachedClasses = false;
+    _canReload = false;
    CachedAssemblyHandles.Add(_handle, this);

    // End
--- a/Source/Engine/Scripting/Scripting.cpp
+++ b/Source/Engine/Scripting/Scripting.cpp
@@ -502,6 +502,7 @@ bool Scripting::LoadBinaryModules(const String& path, const String& projectFolde
            // C#
            if (managedPath.HasChars() && !((ManagedBinaryModule*)module)->Assembly->IsLoaded())
            {
+                (((ManagedBinaryModule*)module)->Assembly)->_canReload = module->CanReload;
                if (((ManagedBinaryModule*)module)->Assembly->Load(managedPath, nativePath))
                {
                    LOG(Error, "Failed to load C# assembly '{0}' for binary module {1}.", managedPath, name);
@@ -528,6 +529,7 @@ bool Scripting::Load()
 #if USE_CSHARP
    // Load C# core assembly
    ManagedBinaryModule* corlib = GetBinaryModuleCorlib();
+    corlib->CanReload = false;
    if (corlib->Assembly->LoadCorlib())
    {
        LOG(Error, "Failed to load corlib C# assembly.");
@@ -581,6 +583,8 @@ bool Scripting::Load()
            LOG(Error, "Failed to load FlaxEngine C# assembly.");
            return true;
        }
+        flaxEngineModule->CanReload = false;
+        flaxEngineModule->Assembly->_canReload = false;
        onEngineLoaded(flaxEngineModule->Assembly);

        // Insert type aliases for vector types that don't exist in C++ but are just typedef (properly redirect them to actual types)
--- a/Source/Engine/Serialization/Serialization.cpp
+++ b/Source/Engine/Serialization/Serialization.cpp
@@ -78,7 +78,10 @@ void Serialization::Deserialize(ISerializable::DeserializeStream& stream, Varian
            v.Type = VariantType::Null;
        const auto mTypeName = SERIALIZE_FIND_MEMBER(stream, "TypeName");
        if (mTypeName != stream.MemberEnd() && mTypeName->value.IsString())
+        {
            v.SetTypeName(StringAnsiView(mTypeName->value.GetStringAnsiView()));
+            v.Inline();
+        }
    }
    else
    {
--- a/Source/Engine/Serialization/Stream.cpp
+++ b/Source/Engine/Serialization/Stream.cpp
@@ -255,6 +255,7 @@ void ReadStream::Read(VariantType& data)
            ptr++;
        }
        *ptr = 0;
+        data.Inline();
    }
    else if (typeNameLength > 0)
    {
--- a/Source/Engine/Threading/ConcurrentDictionary.h
+++ b/Source/Engine/Threading/ConcurrentDictionary.h
@@ -0,0 +1,318 @@
+// Copyright (c) Wojciech Figat. All rights reserved.
+
+#pragma once
+
+#include "Engine/Core/Collections/Dictionary.h"
+#include "Engine/Platform/CriticalSection.h"
+
+/// <summary>
+/// Template for unordered dictionary with mapped key with value pairs that supports asynchronous data reading and writing.
+/// Implemented via reader-writer lock pattern, so multiple threads can read data at the same time, but only one thread can write data and it blocks all other threads (including readers) until the write operation is finished.
+/// Optimized for frequent reads (no lock operation).
+/// </summary>
+/// <typeparam name="KeyType">The type of the keys in the dictionary.</typeparam>
+/// <typeparam name="ValueType">The type of the values in the dictionary.</typeparam>
+/// <typeparam name="AllocationType">The type of memory allocator.</typeparam>
+template<typename KeyType, typename ValueType, typename AllocationType = HeapAllocation>
+class ConcurrentDictionary : Dictionary<KeyType, ValueType, AllocationType>
+{
+    friend ConcurrentDictionary;
+public:
+    typedef Dictionary<KeyType, ValueType, AllocationType> Base;
+    typedef DictionaryBucket<KeyType, ValueType, AllocationType> Bucket;
+    using AllocationData = typename AllocationType::template Data<Bucket>;
+    using AllocationTag = typename AllocationType::Tag;
+
+private:
+    mutable volatile int64 _threadsReading = 0;
+    volatile int64 _threadsWriting = 0;
+    CriticalSection _locker;
+
+public:
+    /// <summary>
+    /// Initializes an empty <see cref="ConcurrentDictionary"/> without reserving any space.
+    /// </summary>
+    ConcurrentDictionary()
+    {
+    }
+
+    /// <summary>
+    /// Initializes an empty <see cref="ConcurrentDictionary"/> without reserving any space.
+    /// </summary>
+    /// <param name="tag">The custom allocation tag.</param>
+    ConcurrentDictionary(AllocationTag tag)
+        : Base(tag)
+    {
+    }
+
+    /// <summary>
+    /// Finalizes an instance of the <see cref="ConcurrentDictionary"/> class.
+    /// </summary>
+    ~ConcurrentDictionary()
+    {
+        Clear();
+    }
+
+public:
+    /// <summary>
+    /// Gets the amount of the elements in the collection.
+    /// </summary>
+    int32 Count() const
+    {
+        Reader reader(this);
+        return Base::_elementsCount;
+    }
+
+    /// <summary>
+    /// Gets the amount of the elements that can be contained by the collection.
+    /// </summary>
+    int32 Capacity() const
+    {
+        Reader reader(this);
+        return Base::_size;
+    }
+
+    /// <summary>
+    /// Tries to get element with given key.
+    /// </summary>
+    /// <param name="key">The key of the element.</param>
+    /// <param name="result">The result value.</param>
+    /// <returns>True if element of given key has been found, otherwise false.</returns>
+    template<typename KeyComparableType>
+    bool TryGet(const KeyComparableType& key, ValueType& result) const
+    {
+        Reader reader(this);
+        typename Base::FindPositionResult pos;
+        Base::FindPosition(key, pos);
+        if (pos.ObjectIndex != -1)
+            result = Base::_allocation.Get()[pos.ObjectIndex].Value;
+        return pos.ObjectIndex != -1;
+    }
+
+public:
+    /// <summary>
+    /// Adds a pair of key and value to the collection.
+    /// </summary>
+    /// <param name="key">The key.</param>
+    /// <param name="value">The value.</param>
+    /// <returns>True if added element, otherwise false if it already exists (or other thread added it).</returns>
+    template<typename KeyComparableType>
+    bool Add(const KeyComparableType& key, const ValueType& value)
+    {
+        Writer writer(this);
+        Bucket* bucket = Base::OnAdd(key, false, true);
+        if (bucket)
+            bucket->Occupy(key, value);
+        return bucket != nullptr;
+    }
+
+    /// <summary>
+    /// Removes element with a specified key.
+    /// </summary>
+    /// <param name="key">The element key to remove.</param>
+    /// <returns>True if item was removed from collection, otherwise false.</returns>
+    template<typename KeyComparableType>
+    bool Remove(const KeyComparableType& key)
+    {
+        Writer writer(this);
+        return Base::Remove(key);
+    }
+
+public:
+    /// <summary>
+    /// Removes all elements from the collection.
+    /// </summary>
+    void Clear()
+    {
+        Writer writer(this);
+        Base::Clear();
+    }
+
+public:
+    /// <summary>
+    /// The read-only dictionary collection iterator.
+    /// </summary>
+    struct ConstIterator : Base::IteratorBase
+    {
+        friend ConcurrentDictionary;
+    public:
+        ConstIterator(const ConcurrentDictionary* collection, const int32 index)
+            : Base::IteratorBase(collection, index)
+        {
+            if (collection)
+                collection->BeginRead();
+        }
+
+        ConstIterator(const ConstIterator& i)
+            : Base::IteratorBase(i._collection, i._index)
+        {
+            if (i.collection)
+                i.collection->BeginRead();
+        }
+
+        ConstIterator(ConstIterator&& i) noexcept
+            : Base::IteratorBase(i._collection, i._index)
+        {
+            i._collection = nullptr;
+        }
+
+        ~ConstIterator()
+        {
+            if (this->_collection)
+                ((ConcurrentDictionary*)this->_collection)->EndRead();
+        }
+
+    public:
+        FORCE_INLINE bool operator!() const
+        {
+            return !(bool)*this;
+        }
+
+        FORCE_INLINE bool operator==(const ConstIterator& v) const
+        {
+            return this->_index == v._index && this->_collection == v._collection;
+        }
+
+        FORCE_INLINE bool operator!=(const ConstIterator& v) const
+        {
+            return this->_index != v._index || this->_collection != v._collection;
+        }
+
+        ConstIterator& operator=(const ConstIterator& v)
+        {
+            this->_collection = v._collection;
+            this->_index = v._index;
+            return *this;
+        }
+
+        ConstIterator& operator=(ConstIterator&& v) noexcept
+        {
+            this->_collection = v._collection;
+            this->_index = v._index;
+            v._collection = nullptr;
+            return *this;
+        }
+
+        ConstIterator& operator++()
+        {
+            this->Next();
+            return *this;
+        }
+
+        ConstIterator operator++(int) const
+        {
+            ConstIterator i = *this;
+            i.Next();
+            return i;
+        }
+
+        ConstIterator& operator--()
+        {
+            this->Prev();
+            return *this;
+        }
+
+        ConstIterator operator--(int) const
+        {
+            ConstIterator i = *this;
+            i.Prev();
+            return i;
+        }
+    };
+
+    ConstIterator begin() const
+    {
+        ConstIterator i(this, -1);
+        ++i;
+        return i;
+    }
+
+    FORCE_INLINE ConstIterator end() const
+    {
+        return ConstIterator(this, Base::_size);
+    }
+
+private:
+    void BeginWrite()
+    {
+        Platform::InterlockedIncrement(&_threadsWriting);
+
+        // Wait for all reads to end
+    RETRY:
+        while (Platform::AtomicRead(&_threadsReading))
+            Platform::Yield();
+
+        // Thread-safe writing
+        _locker.Lock();
+        if (Platform::AtomicRead(&_threadsReading))
+        {
+            // Other reader entered during mutex locking so give them a chance to transition into active-waiting
+            _locker.Unlock();
+            goto RETRY;
+        }
+    }
+
+    void EndWrite()
+    {
+        _locker.Unlock();
+        Platform::InterlockedDecrement(&_threadsWriting);
+    }
+
+    void BeginRead() const
+    {
+    RETRY:
+        Platform::InterlockedIncrement(&_threadsReading);
+
+        // Check if any thread is writing (or is about to write)
+        if (Platform::AtomicRead(&_threadsWriting) != 0)
+        {
+            // Wait for all writes to end
+            Platform::InterlockedDecrement(&_threadsReading);
+            while (Platform::AtomicRead(&_threadsWriting))
+                Platform::Yield();
+
+            // Try again
+            goto RETRY;
+        }
+    }
+
+    void EndRead() const
+    {
+        Platform::InterlockedDecrement(&_threadsReading);
+    }
+
+private:
+    // Utility for methods that read-write state.
+    struct Writer
+    {
+        ConcurrentDictionary* _collection;
+
+        Writer(ConcurrentDictionary* collection)
+            : _collection(collection)
+        {
+            _collection->BeginWrite();
+        }
+
+        ~Writer()
+        {
+            _collection->EndWrite();
+        }
+    };
+
+    // Utility for methods that read-only state.
+    struct Reader
+    {
+        const ConcurrentDictionary* _collection;
+
+        Reader(const ConcurrentDictionary* collection)
+            : _collection(collection)
+        {
+            _collection->BeginRead();
+        }
+
+        ~Reader()
+        {
+            _collection->EndRead();
+        }
+    };
+};
--- a/Source/Engine/Threading/JobSystem.cpp
+++ b/Source/Engine/Threading/JobSystem.cpp
@@ -8,7 +8,6 @@
 #include "Engine/Core/Types/Span.h"
 #include "Engine/Core/Types/Pair.h"
 #include "Engine/Core/Memory/SimpleHeapAllocation.h"
-#include "Engine/Core/Collections/Dictionary.h"
 #include "Engine/Core/Collections/RingBuffer.h"
 #include "Engine/Engine/EngineService.h"
 #include "Engine/Profiler/ProfilerCPU.h"
@@ -22,14 +21,6 @@

 #if JOB_SYSTEM_ENABLED

-// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread).
-class JobSystemAllocation : public SimpleHeapAllocation<JobSystemAllocation>
-{
-public:
-    static void* Allocate(uintptr size);
-    static void Free(void* ptr, uintptr size);
-};
-
 class JobSystemService : public EngineService
 {
 public:
@@ -43,30 +34,25 @@ public:
    void Dispose() override;
 };

-struct JobData
+// Holds a single job dispatch data
+struct alignas(int64) JobContext
 {
-    int32 Index;
-    int64 JobKey;
-};
-
-template<>
-struct TIsPODType<JobData>
-{
-    enum { Value = true };
-};
-
-struct JobContext
-{
-    volatile int64 JobsLeft;
-    int32 DependenciesLeft;
+    // The next index of the job to process updated when picking a job by the thread.
+    volatile int64 JobIndex = 0;
+    // The number of jobs left to process updated after job completion by the thread.
+    volatile int64 JobsLeft = 0;
+    // The unique label of this job used to identify it. Set to -1 when job is done.
+    volatile int64 JobLabel = 0;
+    // Utility atomic counter used to indicate that any job is waiting for this one to finish. Then Dependants can be accessed within thread-safe JobsLocker.
+    volatile int64 DependantsCount = 0;
+    // The number of dependency jobs left to be finished before starting this job.
+    volatile int64 DependenciesLeft = 0;
+    // The total number of jobs to process (in this context).
+    int32 JobsCount = 0;
+    // The job function to execute.
    Function<void(int32)> Job;
-    Array<int64, JobSystemAllocation> Dependants;
-};
-
-template<>
-struct TIsPODType<JobContext>
-{
-    enum { Value = false };
+    // List of dependant jobs to signal when this job is done.
+    Array<int64> Dependants;
 };

 class JobSystemThread : public IRunnable
@@ -92,50 +78,36 @@ public:
 namespace
 {
    JobSystemService JobSystemInstance;
-    Array<Pair<void*, uintptr>> MemPool;
    Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {};
    int32 ThreadsCount = 0;
    bool JobStartingOnDispatch = true;
    volatile int64 ExitFlag = 0;
    volatile int64 JobLabel = 0;
-    Dictionary<int64, JobContext, JobSystemAllocation> JobContexts;
+    volatile int64 JobEndLabel = 0;
+    volatile int64 JobStartLabel = 0;
+    volatile int64 JobContextsCount = 0;
+    uint32 JobContextsSize = 0;
+    uint32 JobContextsMask = 0;
+    JobContext* JobContexts = nullptr;
    ConditionVariable JobsSignal;
    CriticalSection JobsMutex;
    ConditionVariable WaitSignal;
    CriticalSection WaitMutex;
    CriticalSection JobsLocker;
-    RingBuffer<JobData> Jobs;
-}
-
-void* JobSystemAllocation::Allocate(uintptr size)
-{
-    void* result = nullptr;
-    for (int32 i = 0; i < MemPool.Count(); i++)
-    {
-        if (MemPool.Get()[i].Second == size)
-        {
-            result = MemPool.Get()[i].First;
-            MemPool.RemoveAt(i);
-            break;
-        }
-    }
-    if (!result)
-    {
-        PROFILE_MEM(EngineThreading);
-        result = Platform::Allocate(size, 16);
-    }
-    return result;
-}
-
-void JobSystemAllocation::Free(void* ptr, uintptr size)
-{
-    PROFILE_MEM(EngineThreading);
-    MemPool.Add({ ptr, size });
+#define GET_CONTEXT_INDEX(label) (uint32)((label) & (int64)JobContextsMask)
 }

 bool JobSystemService::Init()
 {
    PROFILE_MEM(EngineThreading);
+
+    // Initialize job context storage (fixed-size ring buffer for active jobs tracking)
+    JobContextsSize = 256;
+    JobContextsMask = JobContextsSize - 1;
+    JobContexts = (JobContext*)Platform::Allocate(JobContextsSize * sizeof(JobContext), alignof(JobContext));
+    Memory::ConstructItems(JobContexts, (int32)JobContextsSize);
+
+    // Spawn threads
    ThreadsCount = Math::Min<int32>(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads));
    for (int32 i = 0; i < ThreadsCount; i++)
    {
@@ -146,6 +118,7 @@ bool JobSystemService::Init()
            return true;
        Threads[i] = thread;
    }
+
    return false;
 }

@@ -171,35 +144,67 @@ void JobSystemService::Dispose()
        }
    }

-    JobContexts.SetCapacity(0);
-    Jobs.Release();
-    for (auto& e : MemPool)
-        Platform::Free(e.First);
-    MemPool.Clear();
+    Memory::DestructItems(JobContexts, (int32)JobContextsSize);
+    Platform::Free(JobContexts);
+    JobContexts = nullptr;
 }

 int32 JobSystemThread::Run()
 {
+    // Pin thread to the physical core
    Platform::SetThreadAffinityMask(1ull << Index);

-    JobData data;
-    Function<void(int32)> job;
    bool attachCSharpThread = true;
    MONO_THREAD_INFO_TYPE* monoThreadInfo = nullptr;
    while (Platform::AtomicRead(&ExitFlag) == 0)
    {
        // Try to get a job
-        JobsLocker.Lock();
-        if (Jobs.Count() != 0)
+        int32 jobIndex;
+        JobContext* jobContext = nullptr;
        {
-            data = Jobs.PeekFront();
-            Jobs.PopFront();
-            const JobContext& context = ((const Dictionary<int64, JobContext>&)JobContexts).At(data.JobKey);
-            job = context.Job;
-        }
-        JobsLocker.Unlock();
+            int64 jobOffset = 0;
+        RETRY:
+            int64 jobStartLabel = Platform::AtomicRead(&JobStartLabel) + jobOffset;
+            int64 jobEndLabel = Platform::AtomicRead(&JobEndLabel);
+            if (jobStartLabel <= jobEndLabel && jobEndLabel > 0)
+            {
+                jobContext = &JobContexts[GET_CONTEXT_INDEX(jobStartLabel)];
+                if (Platform::AtomicRead(&jobContext->DependenciesLeft) > 0)
+                {
+                    // This job still waits for dependency so skip it for now and try the next one
+                    jobOffset++;
+                    jobContext = nullptr;
+                    goto RETRY;
+                }

-        if (job.IsBinded())
+                // Move forward with index for a job
+                jobIndex = (int32)(Platform::InterlockedIncrement(&jobContext->JobIndex) - 1);
+                if (jobIndex < jobContext->JobsCount)
+                {
+                    // Index is valid
+                }
+                else if (jobStartLabel < jobEndLabel && jobOffset == 0)
+                {
+                    // No more jobs inside this context, move to the next one
+                    Platform::InterlockedCompareExchange(&JobStartLabel, jobStartLabel + 1, jobStartLabel);
+                    jobContext = nullptr;
+                    goto RETRY;
+                }
+                else
+                {
+                    // No more jobs
+                    jobContext = nullptr;
+                    if (jobStartLabel < jobEndLabel)
+                    {
+                        // Try with a different one before going to sleep
+                        jobOffset++;
+                        goto RETRY;
+                    }
+                }
+            }
+        }
+
+        if (jobContext)
        {
 #if USE_CSHARP
            // Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System)
@@ -212,37 +217,39 @@ int32 JobSystemThread::Run()
 #endif

            // Run job
-            job(data.Index);
+            jobContext->Job(jobIndex);

            // Move forward with the job queue
-            bool notifyWaiting = false;
-            JobsLocker.Lock();
-            JobContext& context = JobContexts.At(data.JobKey);
-            if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0)
+            if (Platform::InterlockedDecrement(&jobContext->JobsLeft) <= 0)
            {
-                // Update any dependant jobs
-                for (int64 dependant : context.Dependants)
+                // Mark job as done before processing dependants
+                Platform::AtomicStore(&jobContext->JobLabel, -1);
+
+                // Check if any other job waits on this one
+                if (Platform::AtomicRead(&jobContext->DependantsCount) != 0)
                {
-                    JobContext& dependantContext = JobContexts.At(dependant);
-                    if (--dependantContext.DependenciesLeft <= 0)
+                    // Update dependant jobs
+                    JobsLocker.Lock();
+                    for (int64 dependant : jobContext->Dependants)
                    {
-                        // Dispatch dependency when it's ready
-                        JobData dependantData;
-                        dependantData.JobKey = dependant;
-                        for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++)
-                            Jobs.PushBack(dependantData);
+                        JobContext& dependantContext = JobContexts[GET_CONTEXT_INDEX(dependant)];
+                        if (dependantContext.JobLabel == dependant)
+                            Platform::InterlockedDecrement(&dependantContext.DependenciesLeft);
                    }
+                    JobsLocker.Unlock();
                }

-                // Remove completed context
-                JobContexts.Remove(data.JobKey);
-                notifyWaiting = true;
-            }
-            JobsLocker.Unlock();
-            if (notifyWaiting)
-                WaitSignal.NotifyAll();
+                // Cleanup completed context
+                jobContext->Job.Unbind();
+                jobContext->Dependants.Clear();
+                Platform::AtomicStore(&jobContext->DependantsCount, 0);
+                Platform::AtomicStore(&jobContext->DependenciesLeft, -999); // Mark to indicate deleted context
+                Platform::AtomicStore(&jobContext->JobLabel, -1);
+                Platform::InterlockedDecrement(&JobContextsCount);

-            job.Unbind();
+                // Wakeup any thread waiting for the jobs to complete
+                WaitSignal.NotifyAll();
+            }
        }
        else
        {
@@ -266,8 +273,8 @@ void JobSystem::Execute(const Function<void(int32)>& job, int32 jobCount)
    if (jobCount > 1)
    {
        // Async
-        const int64 jobWaitHandle = Dispatch(job, jobCount);
-        Wait(jobWaitHandle);
+        const int64 label = Dispatch(job, jobCount);
+        Wait(label);
    }
    else
 #endif
@@ -284,21 +291,31 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
        return 0;
    PROFILE_CPU();
 #if JOB_SYSTEM_ENABLED
-    const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
+    while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize)
+    {
+        // Too many jobs in flight, wait for some to complete to free up contexts
+        PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW");
+        ZoneColor(TracyWaitZoneColor);
+        Platform::InterlockedDecrement(&JobContextsCount);
+        Platform::Sleep(1);
+    }

-    JobData data;
-    data.JobKey = label;
+    // Get a new label
+    const int64 label = Platform::InterlockedIncrement(&JobLabel);

-    JobContext context;
+    // Build job
+    JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)];
    context.Job = job;
+    context.JobIndex = 0;
    context.JobsLeft = jobCount;
+    context.JobLabel = label;
+    context.DependantsCount = 0;
    context.DependenciesLeft = 0;
+    context.JobsCount = jobCount;
+    context.Dependants.Clear();

-    JobsLocker.Lock();
-    JobContexts.Add(label, MoveTemp(context));
-    for (data.Index = 0; data.Index < jobCount; data.Index++)
-        Jobs.PushBack(data);
-    JobsLocker.Unlock();
+    // Move the job queue forward
+    Platform::InterlockedIncrement(&JobEndLabel);

    if (JobStartingOnDispatch)
    {
@@ -321,34 +338,47 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, Span<int64> dependen
    if (jobCount <= 0)
        return 0;
    PROFILE_CPU();
+    PROFILE_MEM(EngineThreading);
 #if JOB_SYSTEM_ENABLED
-    const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
+    while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize)
+    {
+        // Too many jobs in flight, wait for some to complete to free up contexts
+        PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW");
+        ZoneColor(TracyWaitZoneColor);
+        Platform::InterlockedDecrement(&JobContextsCount);
+        Platform::Sleep(1);
+    }

-    JobData data;
-    data.JobKey = label;
+    // Get a new label
+    const int64 label = Platform::InterlockedIncrement(&JobLabel);

-    JobContext context;
+    // Build job
+    JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)];
    context.Job = job;
+    context.JobIndex = 0;
    context.JobsLeft = jobCount;
+    context.JobLabel = label;
+    context.DependantsCount = 0;
    context.DependenciesLeft = 0;
-
-    JobsLocker.Lock();
-    for (int64 dependency : dependencies)
+    context.JobsCount = jobCount;
+    context.Dependants.Clear();
    {
-        if (JobContext* dependencyContext = JobContexts.TryGet(dependency))
+        JobsLocker.Lock();
+        for (int64 dependency : dependencies)
        {
-            context.DependenciesLeft++;
-            dependencyContext->Dependants.Add(label);
+            JobContext& dependencyContext = JobContexts[GET_CONTEXT_INDEX(dependency)];
+            if (Platform::AtomicRead(&dependencyContext.JobLabel) == dependency)
+            {
+                Platform::InterlockedIncrement(&dependencyContext.DependantsCount);
+                dependencyContext.Dependants.Add(label);
+                context.DependenciesLeft++;
+            }
        }
+        JobsLocker.Unlock();
    }
-    JobContexts.Add(label, MoveTemp(context));
-    if (context.DependenciesLeft == 0)
-    {
-        // No dependencies left to complete so dispatch now
-        for (data.Index = 0; data.Index < jobCount; data.Index++)
-            Jobs.PushBack(data);
-    }
-    JobsLocker.Unlock();
+
+    // Move the job queue forward
+    Platform::InterlockedIncrement(&JobEndLabel);

    if (context.DependenciesLeft == 0 && JobStartingOnDispatch)
    {
@@ -369,19 +399,17 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, Span<int64> dependen
 void JobSystem::Wait()
 {
 #if JOB_SYSTEM_ENABLED
-    JobsLocker.Lock();
-    int32 numJobs = JobContexts.Count();
-    JobsLocker.Unlock();
+    PROFILE_CPU();
+    ZoneColor(TracyWaitZoneColor);

+    int64 numJobs = Platform::AtomicRead(&JobContextsCount);
    while (numJobs > 0)
    {
        WaitMutex.Lock();
        WaitSignal.Wait(WaitMutex, 1);
        WaitMutex.Unlock();

-        JobsLocker.Lock();
-        numJobs = JobContexts.Count();
-        JobsLocker.Unlock();
+        numJobs = Platform::AtomicRead(&JobContextsCount);
    }
 #endif
 }
@@ -394,12 +422,11 @@ void JobSystem::Wait(int64 label)

    while (Platform::AtomicRead(&ExitFlag) == 0)
    {
-        JobsLocker.Lock();
-        const JobContext* context = JobContexts.TryGet(label);
-        JobsLocker.Unlock();
+        const JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)];
+        const bool finished = Platform::AtomicRead(&context.JobLabel) != label || Platform::AtomicRead(&context.JobsLeft) <= 0;

        // Skip if context has been already executed (last job removes it)
-        if (!context)
+        if (finished)
            break;

        // Wait on signal until input label is not yet done
@@ -417,15 +444,10 @@ void JobSystem::SetJobStartingOnDispatch(bool value)
 {
 #if JOB_SYSTEM_ENABLED
    JobStartingOnDispatch = value;
-    if (value)
+    if (value && (Platform::AtomicRead(&JobEndLabel) - Platform::AtomicRead(&JobStartLabel)) > 0)
    {
-        JobsLocker.Lock();
-        const int32 count = Jobs.Count();
-        JobsLocker.Unlock();
-        if (count == 1)
-            JobsSignal.NotifyOne();
-        else if (count != 0)
-            JobsSignal.NotifyAll();
+        // Wake up threads to start processing jobs that may be already in the queue
+        JobsSignal.NotifyAll();
    }
 #endif
 }
--- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp
+++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp
@@ -384,7 +384,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
        // Apply hardness, use 0.991 as max since any value above will result in harsh aliasing
        auto x2 = writeLocal(ValueType::Float, String::Format(TEXT("saturate((1 - {0}) * (1 / (1 - clamp({1}, 0, 0.991f))))"), x1.Value, hardness.Value), node);

-        value = writeLocal(ValueType::Float, String::Format(TEXT("{0} ? (1 - {1}) : {1}"), invert.Value, x2.Value), node);
+        value = writeLocal(ValueType::Float, String::Format(TEXT("select({0}, (1 - {1}), {1})"), invert.Value, x2.Value), node);
        break;
    }
    // Tiling & Offset
@@ -459,7 +459,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
        auto x = writeLocal(ValueType::Float, String::Format(TEXT("56100000.0f * pow({0}, -1) + 148.0f"), temperature.Value), node);

        // Value Y
-        auto y = writeLocal(ValueType::Float, String::Format(TEXT("{0} > 6500.0f ? 35200000.0f * pow({0}, -1) + 184.0f : 100.04f * log({0}) - 623.6f"), temperature.Value), node);
+        auto y = writeLocal(ValueType::Float, String::Format(TEXT("select({0} > 6500.0f, 35200000.0f * pow({0}, -1) + 184.0f, 100.04f * log({0}) - 623.6f)"), temperature.Value), node);

        // Value Z
        auto z = writeLocal(ValueType::Float, String::Format(TEXT("194.18f * log({0}) - 1448.6f"), temperature.Value), node);
@@ -467,7 +467,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
        // Final color
        auto color = writeLocal(ValueType::Float3, String::Format(TEXT("float3({0}, {1}, {2})"), x.Value, y.Value, z.Value), node);
        color = writeLocal(ValueType::Float3, String::Format(TEXT("clamp({0}, 0.0f, 255.0f) / 255.0f"), color.Value), node);
-        value = writeLocal(ValueType::Float3, String::Format(TEXT("{1} < 1000.0f ? {0} * {1}/1000.0f : {0}"), color.Value, temperature.Value), node);
+        value = writeLocal(ValueType::Float3, String::Format(TEXT("select({1} < 1000.0f, {0} * {1}/1000.0f, {0})"), color.Value, temperature.Value), node);
        break;
    }
    // HSVToRGB
@@ -490,8 +490,8 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
        const auto rgb = tryGetValue(node->GetBox(0), node->Values[0]).AsFloat3();
        const auto epsilon = writeLocal(ValueType::Float, TEXT("1e-10"), node);

-        auto p = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.g < {0}.b) ? float4({0}.bg, -1.0f, 2.0f/3.0f) : float4({0}.gb, 0.0f, -1.0f/3.0f)"), rgb.Value), node);
-        auto q = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.r < {1}.x) ? float4({1}.xyw, {0}.r) : float4({0}.r, {1}.yzx)"), rgb.Value, p.Value), node);
+        auto p = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.g < {0}.b), float4({0}.bg, -1.0f, 2.0f/3.0f), float4({0}.gb, 0.0f, -1.0f/3.0f))"), rgb.Value), node);
+        auto q = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.r < {1}.x), float4({1}.xyw, {0}.r), float4({0}.r, {1}.yzx))"), rgb.Value, p.Value), node);
        auto c = writeLocal(ValueType::Float, String::Format(TEXT("{0}.x - min({0}.w, {0}.y)"), q.Value), node);
        auto h = writeLocal(ValueType::Float, String::Format(TEXT("abs(({0}.w - {0}.y) / (6 * {1} + {2}) + {0}.z)"), q.Value, c.Value, epsilon.Value), node);

@@ -721,13 +721,13 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
            blendFormula = TEXT("1.0 - (1.0 - base) * (1.0 - blend)");
            break;
        case 5: // Overlay
-            blendFormula = TEXT("base <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)");
+            blendFormula = TEXT("select(base <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))");
            break;
        case 6: // Linear Burn
            blendFormula = TEXT("base + blend - 1.0");
            break;
        case 7: // Linear Light
-            blendFormula = TEXT("blend < 0.5 ? max(base + (2.0 * blend) - 1.0, 0.0) : min(base + 2.0 * (blend - 0.5), 1.0)");
+            blendFormula = TEXT("select(blend < 0.5, max(base + (2.0 * blend) - 1.0, 0.0), min(base + 2.0 * (blend - 0.5), 1.0))");
            break;
        case 8: // Darken
            blendFormula = TEXT("min(base, blend)");
@@ -745,10 +745,10 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value)
            blendFormula = TEXT("base / (blend + 0.000001)");
            break;
        case 13: // Hard Light
-            blendFormula = TEXT("blend <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)");
+            blendFormula = TEXT("select(blend <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))");
            break;
        case 14: // Pin Light
-            blendFormula = TEXT("blend <= 0.5 ? min(base, 2.0 * blend) : max(base, 2.0 * (blend - 0.5))");
+            blendFormula = TEXT("select(blend <= 0.5, min(base, 2.0 * blend), max(base, 2.0 * (blend - 0.5)))");
            break;
        case 15: // Hard Mix
            blendFormula = TEXT("step(1.0 - base, blend)");