diff --git a/Flax.flaxproj b/Flax.flaxproj index 74ab82f55..c9c27281b 100644 --- a/Flax.flaxproj +++ b/Flax.flaxproj @@ -4,10 +4,10 @@ "Major": 1, "Minor": 11, "Revision": 0, - "Build": 6806 + "Build": 6807 }, "Company": "Flax", - "Copyright": "Copyright (c) 2012-2025 Wojciech Figat. All rights reserved.", + "Copyright": "Copyright (c) 2012-2026 Wojciech Figat. All rights reserved.", "GameTarget": "FlaxGame", "EditorTarget": "FlaxEditor", "Configuration": { diff --git a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs index 7e809d968..12142219c 100644 --- a/Source/Editor/Windows/Assets/AnimationGraphWindow.cs +++ b/Source/Editor/Windows/Assets/AnimationGraphWindow.cs @@ -99,7 +99,14 @@ namespace FlaxEditor.Windows.Assets Window = window; var surfaceParam = window.Surface.GetParameter(BaseModelId); if (surfaceParam != null) - BaseModel = FlaxEngine.Content.LoadAsync((Guid)surfaceParam.Value); + { + if (surfaceParam.Value is Guid asGuid) + BaseModel = FlaxEngine.Content.LoadAsync(asGuid); + else if (surfaceParam.Value is SkinnedModel asModel) + BaseModel = asModel; + else + BaseModel = null; + } else BaseModel = window.PreviewActor.GetParameterValue(BaseModelId) as SkinnedModel; } diff --git a/Source/Editor/Windows/Assets/SkinnedModelWindow.cs b/Source/Editor/Windows/Assets/SkinnedModelWindow.cs index 392cc896a..75fa87dfe 100644 --- a/Source/Editor/Windows/Assets/SkinnedModelWindow.cs +++ b/Source/Editor/Windows/Assets/SkinnedModelWindow.cs @@ -70,6 +70,13 @@ namespace FlaxEditor.Windows.Assets return; var nodes = proxy.Asset.Nodes; var bones = proxy.Asset.Bones; + var blendShapes = proxy.Asset.BlendShapes; + + // Info + { + var group = layout.Group("Info"); + group.Label($"Nodes: {nodes.Length}\nBones: {bones.Length}\nBlend Shapes: {blendShapes.Length}").AddCopyContextMenu().Label.Height *= 2.5f; + } // Skeleton Bones { @@ -109,7 +116,6 @@ namespace FlaxEditor.Windows.Assets } // Blend Shapes - var blendShapes = proxy.Asset.BlendShapes; if (blendShapes.Length != 0) { var group = layout.Group("Blend Shapes"); diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index e99f53b8f..3c2630b51 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -336,11 +336,13 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) SkeletonData* animResultSkeleton = &skeleton; // Retarget animation when using output pose from other skeleton - AnimGraphImpulse retargetNodes; if (_graph.BaseModel != data.NodesSkeleton) { ANIM_GRAPH_PROFILE_EVENT("Retarget"); auto& targetSkeleton = data.NodesSkeleton->Skeleton; + if (context.PoseCacheSize == context.PoseCache.Count()) + context.PoseCache.AddOne(); + auto& retargetNodes = context.PoseCache[context.PoseCacheSize++]; retargetNodes = *animResult; retargetNodes.Nodes.Resize(targetSkeleton.Nodes.Count()); Transform* targetNodes = retargetNodes.Nodes.Get(); diff --git a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp index 08767728a..7eb8d32d6 100644 --- a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp +++ b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp @@ -109,86 +109,84 @@ namespace nodes->RootMotion.Orientation.Normalize(); } } - - Matrix ComputeWorldMatrixRecursive(const SkeletonData& skeleton, int32 index, Matrix localMatrix) - { - const auto& node = skeleton.Nodes[index]; - index = node.ParentIndex; - while (index != -1) - { - const auto& parent = skeleton.Nodes[index]; - localMatrix *= parent.LocalTransform.GetWorld(); - index = parent.ParentIndex; - } - return localMatrix; - } - - Matrix ComputeInverseParentMatrixRecursive(const SkeletonData& skeleton, int32 index) - { - Matrix inverseParentMatrix = Matrix::Identity; - const auto& node = skeleton.Nodes[index]; - if (node.ParentIndex != -1) - { - inverseParentMatrix = ComputeWorldMatrixRecursive(skeleton, index, inverseParentMatrix); - inverseParentMatrix = Matrix::Invert(inverseParentMatrix); - } - return inverseParentMatrix; - } } -void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 targetIndex) +// Utility for retargeting animation poses between skeletons. +struct Retargeting { - // sourceSkeleton - skeleton of Anim Graph (Base Locomotion pack) - // targetSkeleton - visual mesh skeleton (City Characters pack) - // target - anim graph input/output transformation of that node - const auto& targetNode = targetSkeleton.Nodes[targetIndex]; - const int32 sourceIndex = sourceMapping.NodesMapping[targetIndex]; - if (sourceIndex == -1) +private: + const Matrix* _sourcePosePtr, * _targetPosePtr; + const SkeletonData* _sourceSkeleton, *_targetSkeleton; + const SkinnedModel::SkeletonMapping* _sourceMapping; + +public: + void Init(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping) { - // Use T-pose - node = targetNode.LocalTransform; - return; + ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == sourceMapping.NodesMapping.Length()); + + // Cache world-space poses for source and target skeletons to avoid redundant calculations during retargeting + _sourcePosePtr = sourceSkeleton.GetNodesPose().Get(); + _targetPosePtr = targetSkeleton.GetNodesPose().Get(); + + _sourceSkeleton = &sourceSkeleton; + _targetSkeleton = &targetSkeleton; + _sourceMapping = &sourceMapping; } - const auto& sourceNode = sourceSkeleton.Nodes[sourceIndex]; - // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/] - - // Calculate T-Pose of source node, target node and target parent node - Matrix bindMatrix = ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, sourceNode.LocalTransform.GetWorld()); - Matrix inverseBindMatrix = Matrix::Invert(bindMatrix); - Matrix targetMatrix = ComputeWorldMatrixRecursive(targetSkeleton, targetIndex, targetNode.LocalTransform.GetWorld()); - Matrix inverseParentMatrix = ComputeInverseParentMatrixRecursive(targetSkeleton, targetIndex); - - // Target node animation is world-space difference of the animated source node inside the target's parent node world-space - Matrix localMatrix = inverseBindMatrix * ComputeWorldMatrixRecursive(sourceSkeleton, sourceIndex, node.GetWorld()); - localMatrix = targetMatrix * localMatrix * inverseParentMatrix; - - // Extract local node transformation - localMatrix.Decompose(node); -} - -void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes) -{ - // TODO: cache source and target skeletons world-space poses for faster retargeting (use some pooled memory) - ASSERT_LOW_LAYER(targetSkeleton.Nodes.Count() == mapping.NodesMapping.Length()); - for (int32 targetIndex = 0; targetIndex < targetSkeleton.Nodes.Count(); targetIndex++) + void RetargetNode(const Transform& source, Transform& target, int32 sourceIndex, int32 targetIndex) { - auto& targetNode = targetSkeleton.Nodes.Get()[targetIndex]; - const int32 sourceIndex = mapping.NodesMapping.Get()[targetIndex]; - Transform node; + // sourceSkeleton - skeleton of Anim Graph + // targetSkeleton - visual mesh skeleton + // target - anim graph input/output transformation of that node + const SkeletonNode& targetNode = _targetSkeleton->Nodes.Get()[targetIndex]; if (sourceIndex == -1) { // Use T-pose - node = targetNode.LocalTransform; + target = targetNode.LocalTransform; } else { - // Retarget - node = sourceNodes[sourceIndex]; - RetargetSkeletonNode(sourceSkeleton, targetSkeleton, mapping, node, targetIndex); + // [Reference: https://wickedengine.net/2022/09/animation-retargeting/comment-page-1/] + + // Calculate T-Pose of source node, target node and target parent node + const Matrix* sourcePosePtr = _sourcePosePtr; + const Matrix* targetPosePtr = _targetPosePtr; + const Matrix& bindMatrix = sourcePosePtr[sourceIndex]; + const Matrix& targetMatrix = targetPosePtr[targetIndex]; + Matrix inverseParentMatrix; + if (targetNode.ParentIndex != -1) + Matrix::Invert(targetPosePtr[targetNode.ParentIndex], inverseParentMatrix); + else + inverseParentMatrix = Matrix::Identity; + + // Target node animation is world-space difference of the animated source node inside the target's parent node world-space + const SkeletonNode& sourceNode = _sourceSkeleton->Nodes.Get()[sourceIndex]; + Matrix localMatrix = source.GetWorld(); + if (sourceNode.ParentIndex != -1) + localMatrix = localMatrix * sourcePosePtr[sourceNode.ParentIndex]; + localMatrix = Matrix::Invert(bindMatrix) * localMatrix; + localMatrix = targetMatrix * localMatrix * inverseParentMatrix; + + // Extract local node transformation + localMatrix.Decompose(target); } - targetNodes[targetIndex] = node; } + + FORCE_INLINE void RetargetPose(const Transform* sourceNodes, Transform* targetNodes) + { + for (int32 targetIndex = 0; targetIndex < _targetSkeleton->Nodes.Count(); targetIndex++) + { + const int32 sourceIndex = _sourceMapping->NodesMapping.Get()[targetIndex]; + RetargetNode(sourceNodes[sourceIndex], targetNodes[targetIndex], sourceIndex, targetIndex); + } + } +}; + +void RetargetSkeletonPose(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& mapping, const Transform* sourceNodes, Transform* targetNodes) +{ + Retargeting retargeting; + retargeting.Init(sourceSkeleton, targetSkeleton, mapping); + retargeting.RetargetPose(sourceNodes, targetNodes); } AnimGraphTraceEvent& AnimGraphContext::AddTraceEvent(const AnimGraphNode* node) @@ -431,9 +429,13 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode* const bool weighted = weight < 1.0f; const bool retarget = mapping.SourceSkeleton && mapping.SourceSkeleton != mapping.TargetSkeleton; const auto emptyNodes = GetEmptyNodes(); + Retargeting retargeting; SkinnedModel::SkeletonMapping sourceMapping; if (retarget) + { sourceMapping = _graph.BaseModel->GetSkeletonMapping(mapping.SourceSkeleton); + retargeting.Init(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, mapping); + } for (int32 nodeIndex = 0; nodeIndex < nodes->Nodes.Count(); nodeIndex++) { const int32 nodeToChannel = mapping.NodesMapping[nodeIndex]; @@ -447,7 +449,8 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode* // Optionally retarget animation into the skeleton used by the Anim Graph if (retarget) { - RetargetSkeletonNode(mapping.SourceSkeleton->Skeleton, mapping.TargetSkeleton->Skeleton, sourceMapping, srcNode, nodeIndex); + const int32 sourceIndex = sourceMapping.NodesMapping[nodeIndex]; + retargeting.RetargetNode(srcNode, srcNode, sourceIndex, nodeIndex); } // Mark node as used diff --git a/Source/Engine/Content/Assets/SkinnedModel.cpp b/Source/Engine/Content/Assets/SkinnedModel.cpp index ed41c4aab..c0355ea0e 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.cpp +++ b/Source/Engine/Content/Assets/SkinnedModel.cpp @@ -61,16 +61,24 @@ Array SkinnedModel::GetBlendShapes() SkinnedModel::SkeletonMapping SkinnedModel::GetSkeletonMapping(Asset* source, bool autoRetarget) { + // Fast-path to use cached mapping SkeletonMapping mapping; mapping.TargetSkeleton = this; + SkeletonMappingData mappingData; + if (_skeletonMappingCache.TryGet(source, mappingData)) + { + mapping.SourceSkeleton = mappingData.SourceSkeleton; + mapping.NodesMapping = mappingData.NodesMapping; + return mapping; + } + mapping.SourceSkeleton = nullptr; + if (WaitForLoaded() || !source || source->WaitForLoaded()) return mapping; + PROFILE_CPU(); ScopeLock lock(Locker); - SkeletonMappingData mappingData; if (!_skeletonMappingCache.TryGet(source, mappingData)) { - PROFILE_CPU(); - // Initialize the mapping SkeletonRetarget* retarget = nullptr; const Guid sourceId = source->GetID(); @@ -370,6 +378,7 @@ bool SkinnedModel::SetupSkeleton(const Array& nodes) model->Skeleton.Bones[i].LocalTransform = node.LocalTransform; model->Skeleton.Bones[i].NodeIndex = i; } + model->Skeleton.Dirty(); ClearSkeletonMapping(); // Calculate offset matrix (inverse bind pose transform) for every bone manually @@ -427,6 +436,7 @@ bool SkinnedModel::SetupSkeleton(const Array& nodes, const ArraySkeleton.Nodes = nodes; model->Skeleton.Bones = bones; + model->Skeleton.Dirty(); ClearSkeletonMapping(); // Calculate offset matrix (inverse bind pose transform) for every bone manually @@ -823,13 +833,13 @@ bool SkinnedModel::SaveMesh(WriteStream& stream, const ModelData& modelData, int void SkinnedModel::ClearSkeletonMapping() { - for (auto& e : _skeletonMappingCache) + for (const auto& e : _skeletonMappingCache) { e.Key->OnUnloaded.Unbind(this); #if USE_EDITOR e.Key->OnReloading.Unbind(this); #endif - Allocator::Free(e.Value.NodesMapping.Get()); + Allocator::Free((void*)e.Value.NodesMapping.Get()); } _skeletonMappingCache.Clear(); } @@ -837,8 +847,9 @@ void SkinnedModel::ClearSkeletonMapping() void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj) { ScopeLock lock(Locker); - auto i = _skeletonMappingCache.Find(obj); - ASSERT(i != _skeletonMappingCache.End()); + SkeletonMappingData mappingData; + bool found = _skeletonMappingCache.TryGet(obj, mappingData); + ASSERT(found); // Unlink event obj->OnUnloaded.Unbind(this); @@ -847,8 +858,8 @@ void SkinnedModel::OnSkeletonMappingSourceAssetUnloaded(Asset* obj) #endif // Clear cache - Allocator::Free(i->Value.NodesMapping.Get()); - _skeletonMappingCache.Remove(i); + Allocator::Free(mappingData.NodesMapping.Get()); + _skeletonMappingCache.Remove(obj); } uint64 SkinnedModel::GetMemoryUsage() const diff --git a/Source/Engine/Content/Assets/SkinnedModel.h b/Source/Engine/Content/Assets/SkinnedModel.h index 894a080c4..111d4d6cb 100644 --- a/Source/Engine/Content/Assets/SkinnedModel.h +++ b/Source/Engine/Content/Assets/SkinnedModel.h @@ -3,7 +3,7 @@ #pragma once #include "ModelBase.h" -#include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Threading/ConcurrentDictionary.h" #include "Engine/Graphics/Models/SkinnedMesh.h" #include "Engine/Graphics/Models/SkeletonData.h" @@ -101,9 +101,9 @@ public: struct FLAXENGINE_API SkeletonMapping { // Target skeleton. - AssetReference TargetSkeleton; + SkinnedModel* TargetSkeleton; // Source skeleton. - AssetReference SourceSkeleton; + SkinnedModel* SourceSkeleton; // The node-to-node mapping for the fast animation sampling for the skinned model skeleton nodes. Each item is index of the source skeleton node into target skeleton node. Span NodesMapping; }; @@ -115,7 +115,7 @@ private: Span NodesMapping; }; - Dictionary _skeletonMappingCache; + ConcurrentDictionary _skeletonMappingCache; public: /// diff --git a/Source/Engine/Content/Assets/VisualScript.cpp b/Source/Engine/Content/Assets/VisualScript.cpp index 329696dea..a7e132bdc 100644 --- a/Source/Engine/Content/Assets/VisualScript.cpp +++ b/Source/Engine/Content/Assets/VisualScript.cpp @@ -1700,6 +1700,8 @@ void VisualScript::CacheScriptingType() VisualScriptingBinaryModule::VisualScriptingBinaryModule() : _name("Visual Scripting") { + // Visual Scripts can be unloaded and loaded again even in game + CanReload = true; } ScriptingObject* VisualScriptingBinaryModule::VisualScriptObjectSpawn(const ScriptingObjectSpawnParams& params) diff --git a/Source/Engine/Core/Collections/Dictionary.h b/Source/Engine/Core/Collections/Dictionary.h index e2f5f0ed6..e18a5b999 100644 --- a/Source/Engine/Core/Collections/Dictionary.h +++ b/Source/Engine/Core/Collections/Dictionary.h @@ -4,6 +4,9 @@ #include "HashSetBase.h" +template +class ConcurrentDictionary; + /// /// Describes single portion of space for the key and value pair in a hash map. /// @@ -13,6 +16,7 @@ struct DictionaryBucket friend Memory; friend HashSetBase; friend Dictionary; + friend ConcurrentDictionary; /// The key. KeyType Key; diff --git a/Source/Engine/Core/Types/Variant.cpp b/Source/Engine/Core/Types/Variant.cpp index 4ab8552d3..dcabe8e48 100644 --- a/Source/Engine/Core/Types/Variant.cpp +++ b/Source/Engine/Core/Types/Variant.cpp @@ -18,8 +18,10 @@ #include "Engine/Core/Math/Ray.h" #include "Engine/Core/Math/Rectangle.h" #include "Engine/Core/Math/Transform.h" +#include "Engine/Scripting/BinaryModule.h" #include "Engine/Scripting/Scripting.h" #include "Engine/Scripting/ScriptingObject.h" +#include "Engine/Scripting/ManagedCLR/MAssembly.h" #include "Engine/Scripting/ManagedCLR/MClass.h" #include "Engine/Scripting/ManagedCLR/MCore.h" #include "Engine/Scripting/ManagedCLR/MUtils.h" @@ -88,6 +90,7 @@ static_assert((int32)VariantType::Types::MAX == ARRAY_COUNT(InBuiltTypesTypeName VariantType::VariantType(Types type, const StringView& typeName) { Type = type; + StaticName = 0; TypeName = nullptr; const int32 length = typeName.Length(); if (length) @@ -98,32 +101,41 @@ VariantType::VariantType(Types type, const StringView& typeName) } } -VariantType::VariantType(Types type, const StringAnsiView& typeName) +VariantType::VariantType(Types type, const StringAnsiView& typeName, bool staticName) { Type = type; - TypeName = nullptr; - int32 length = typeName.Length(); - if (length) + StaticName = staticName && (typeName.HasChars() && typeName[typeName.Length()] == 0); // Require string to be null-terminated (not fully safe check) + if (staticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, typeName.Get(), length); - TypeName[length] = 0; + TypeName = (char*)typeName.Get(); } + else + { + TypeName = nullptr; + int32 length = typeName.Length(); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, typeName.Get(), length); + TypeName[length] = 0; + } + } +} + +VariantType::VariantType(Types type, const ScriptingType& sType) + : VariantType(type) +{ + SetTypeName(sType); } VariantType::VariantType(Types type, const MClass* klass) { Type = type; + StaticName = false; TypeName = nullptr; #if USE_CSHARP if (klass) - { - const StringAnsiView typeName = klass->GetFullName(); - const int32 length = typeName.Length(); - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, typeName.Get(), length); - TypeName[length] = 0; - } + SetTypeName(*klass); #endif } @@ -190,9 +202,9 @@ VariantType::VariantType(const StringAnsiView& typeName) if (const auto mclass = Scripting::FindClass(typeName)) { if (mclass->IsEnum()) - new(this) VariantType(Enum, typeName); + new(this) VariantType(Enum, mclass); else - new(this) VariantType(ManagedObject, typeName); + new(this) VariantType(ManagedObject, mclass); return; } #endif @@ -204,36 +216,48 @@ VariantType::VariantType(const StringAnsiView& typeName) VariantType::VariantType(const VariantType& other) { Type = other.Type; - TypeName = nullptr; - const int32 length = StringUtils::Length(other.TypeName); - if (length) + StaticName = other.StaticName; + if (StaticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, other.TypeName, length); - TypeName[length] = 0; + TypeName = other.TypeName; + } + else + { + TypeName = nullptr; + const int32 length = StringUtils::Length(other.TypeName); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, other.TypeName, length); + TypeName[length] = 0; + } } } VariantType::VariantType(VariantType&& other) noexcept { Type = other.Type; + StaticName = other.StaticName; TypeName = other.TypeName; other.Type = Null; other.TypeName = nullptr; + other.StaticName = 0; } VariantType& VariantType::operator=(const Types& type) { Type = type; - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); TypeName = nullptr; + StaticName = 0; return *this; } VariantType& VariantType::operator=(VariantType&& other) { ASSERT(this != &other); - Swap(Type, other.Type); + Swap(Packed, other.Packed); Swap(TypeName, other.TypeName); return *this; } @@ -242,14 +266,23 @@ VariantType& VariantType::operator=(const VariantType& other) { ASSERT(this != &other); Type = other.Type; - Allocator::Free(TypeName); - TypeName = nullptr; - const int32 length = StringUtils::Length(other.TypeName); - if (length) + if (StaticName) + Allocator::Free(TypeName); + StaticName = other.StaticName; + if (StaticName) { - TypeName = static_cast(Allocator::Allocate(length + 1)); - Platform::MemoryCopy(TypeName, other.TypeName, length); - TypeName[length] = 0; + TypeName = other.TypeName; + } + else + { + TypeName = nullptr; + const int32 length = StringUtils::Length(other.TypeName); + if (length) + { + TypeName = static_cast(Allocator::Allocate(length + 1)); + Platform::MemoryCopy(TypeName, other.TypeName, length); + TypeName[length] = 0; + } } return *this; } @@ -283,24 +316,45 @@ void VariantType::SetTypeName(const StringView& typeName) { if (StringUtils::Length(TypeName) != typeName.Length()) { - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); + StaticName = 0; TypeName = static_cast(Allocator::Allocate(typeName.Length() + 1)); TypeName[typeName.Length()] = 0; } StringUtils::ConvertUTF162ANSI(typeName.Get(), TypeName, typeName.Length()); } -void VariantType::SetTypeName(const StringAnsiView& typeName) +void VariantType::SetTypeName(const StringAnsiView& typeName, bool staticName) { - if (StringUtils::Length(TypeName) != typeName.Length()) + if (StringUtils::Length(TypeName) != typeName.Length() || StaticName != staticName) { - Allocator::Free(TypeName); + if (StaticName) + Allocator::Free(TypeName); + StaticName = staticName; + if (staticName) + { + TypeName = (char*)typeName.Get(); + return; + } TypeName = static_cast(Allocator::Allocate(typeName.Length() + 1)); TypeName[typeName.Length()] = 0; } Platform::MemoryCopy(TypeName, typeName.Get(), typeName.Length()); } +void VariantType::SetTypeName(const ScriptingType& type) +{ + SetTypeName(type.Fullname, type.Module->CanReload); +} + +void VariantType::SetTypeName(const MClass& klass) +{ +#if USE_CSHARP + SetTypeName(klass.GetFullName(), klass.GetAssembly()->CanReload()); +#endif +} + const char* VariantType::GetTypeName() const { if (TypeName) @@ -322,6 +376,29 @@ VariantType VariantType::GetElementType() const return VariantType(); } +void VariantType::Inline() +{ + // Check if the typename comes from static assembly which can be used to inline name instead of dynamic memory allocation + StringAnsiView typeName(TypeName); + auto& modules = BinaryModule::GetModules(); + for (auto module : modules) + { + int32 typeIndex; + if (!module->CanReload && module->FindScriptingType(typeName, typeIndex)) + { + ScriptingTypeHandle typeHandle(module, typeIndex); + SetTypeName(typeHandle.GetType().Fullname, true); + return; + } + } + +#if USE_CSHARP + // Try with C#-only types + if (const auto mclass = Scripting::FindClass(TypeName)) + SetTypeName(*mclass); +#endif +} + ::String VariantType::ToString() const { ::String result; @@ -632,8 +709,7 @@ Variant::Variant(ScriptingObject* v) AsObject = v; if (v) { - // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) - Type.SetTypeName(v->GetType().Fullname); + Type.SetTypeName(v->GetType()); v->Deleted.Bind(this); } } @@ -644,9 +720,8 @@ Variant::Variant(Asset* v) AsAsset = v; if (v) { - // TODO: optimize VariantType to support statically linked typename of ScriptingType (via 1 bit flag within Types enum, only in game as editor might hot-reload types) - Type.SetTypeName(v->GetType().Fullname); v->AddReference(); + Type.SetTypeName(v->GetType()); v->OnUnloaded.Bind(this); } } @@ -3007,16 +3082,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName) switch (type.Type) { case ScriptingTypes::Script: - v.SetType(VariantType(VariantType::Object, typeName)); + v.SetType(VariantType(VariantType::Object, type)); v.AsObject = type.Script.Spawn(ScriptingObjectSpawnParams(Guid::New(), typeHandle)); if (v.AsObject) v.AsObject->Deleted.Bind(&v); break; case ScriptingTypes::Structure: - v.SetType(VariantType(VariantType::Structure, typeName)); + v.SetType(VariantType(VariantType::Structure, type)); break; case ScriptingTypes::Enum: - v.SetType(VariantType(VariantType::Enum, typeName)); + v.SetType(VariantType(VariantType::Enum, type)); v.AsEnum = 0; break; default: @@ -3030,16 +3105,16 @@ Variant Variant::NewValue(const StringAnsiView& typeName) // Fallback to C#-only types if (mclass->IsEnum()) { - v.SetType(VariantType(VariantType::Enum, typeName)); + v.SetType(VariantType(VariantType::Enum, mclass)); v.AsEnum = 0; } else if (mclass->IsValueType()) { - v.SetType(VariantType(VariantType::Structure, typeName)); + v.SetType(VariantType(VariantType::Structure, mclass)); } else { - v.SetType(VariantType(VariantType::ManagedObject, typeName)); + v.SetType(VariantType(VariantType::ManagedObject, mclass)); MObject* instance = mclass->CreateInstance(); if (instance) { diff --git a/Source/Engine/Core/Types/Variant.h b/Source/Engine/Core/Types/Variant.h index 4fd6ab2eb..5c057bc65 100644 --- a/Source/Engine/Core/Types/Variant.h +++ b/Source/Engine/Core/Types/Variant.h @@ -17,7 +17,7 @@ struct ScriptingTypeHandle; /// API_STRUCT(InBuild) struct FLAXENGINE_API VariantType { - enum Types + enum Types : uint8 { Null = 0, Void, @@ -80,10 +80,22 @@ API_STRUCT(InBuild) struct FLAXENGINE_API VariantType }; public: - /// - /// The type of the variant. - /// - Types Type; + union + { + struct + { + /// + /// The type of the variant. + /// + Types Type; + + /// + /// Internal flag used to indicate that pointer to TypeName has been linked from a static/external memory that is stable (eg. ScriptingType or MClass). Allows avoiding dynamic memory allocation. + /// + uint8 StaticName : 1; + }; + uint16 Packed; + }; /// /// The optional additional full name of the scripting type. Used for Asset, Object, Enum, Structure types to describe type precisely. @@ -94,17 +106,20 @@ public: FORCE_INLINE VariantType() { Type = Null; + StaticName = 0; TypeName = nullptr; } FORCE_INLINE explicit VariantType(Types type) { Type = type; + StaticName = 0; TypeName = nullptr; } explicit VariantType(Types type, const StringView& typeName); - explicit VariantType(Types type, const StringAnsiView& typeName); + explicit VariantType(Types type, const StringAnsiView& typeName, bool staticName = false); + explicit VariantType(Types type, const ScriptingType& sType); explicit VariantType(Types type, const MClass* klass); explicit VariantType(const StringAnsiView& typeName); VariantType(const VariantType& other); @@ -112,7 +127,8 @@ public: FORCE_INLINE ~VariantType() { - Allocator::Free(TypeName); + if (!StaticName) + Allocator::Free(TypeName); } public: @@ -130,9 +146,13 @@ public: public: void SetTypeName(const StringView& typeName); - void SetTypeName(const StringAnsiView& typeName); + void SetTypeName(const StringAnsiView& typeName, bool staticName = false); + void SetTypeName(const ScriptingType& type); + void SetTypeName(const MClass& klass); const char* GetTypeName() const; VariantType GetElementType() const; + // Drops custom type name into the name allocated by the scripting module to reduce memory allocations when referencing types. + void Inline(); ::String ToString() const; }; diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index f8b9c7b0f..116866848 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -7,17 +7,17 @@ #include "Engine/Core/Random.h" #include "Engine/Engine/Engine.h" #include "Engine/Graphics/RenderTask.h" +#include "Engine/Graphics/GPUDevice.h" #include "Engine/Content/Deprecated.h" #if !FOLIAGE_USE_SINGLE_QUAD_TREE #include "Engine/Threading/JobSystem.h" #if FOLIAGE_USE_DRAW_CALLS_BATCHING #include "Engine/Graphics/RenderTools.h" -#include "Engine/Graphics/GPUDevice.h" -#include "Engine/Renderer/RenderList.h" #endif #endif #include "Engine/Level/SceneQuery.h" #include "Engine/Profiler/ProfilerCPU.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Renderer/GlobalSignDistanceFieldPass.h" #include "Engine/Renderer/GI/GlobalSurfaceAtlasPass.h" #include "Engine/Serialization/Serialization.h" @@ -41,8 +41,7 @@ Foliage::Foliage(const SpawnParams& params) void Foliage::AddToCluster(ChunkedArray& clusters, FoliageCluster* cluster, FoliageInstance& instance) { - ASSERT(instance.Bounds.Radius > ZeroTolerance); - ASSERT(cluster->Bounds.Intersects(instance.Bounds)); + ASSERT_LOW_LAYER(instance.Bounds.Radius > ZeroTolerance); // Minor clusters don't use bounds intersection but try to find the first free cluster instead if (cluster->IsMinor) @@ -63,6 +62,7 @@ void Foliage::AddToCluster(ChunkedArrayBounds.Intersects(instance.Bounds)); while (cluster->Children[0]) { #define CHECK_CHILD(idx) \ @@ -193,6 +193,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, // Draw visible instances const auto frame = Engine::FrameCount; const auto model = type.Model.Get(); + const auto transitionLOD = renderContext.View.Pass != DrawPass::Depth; // Let the main view pass update LOD transitions + // TODO: move DrawState to be stored per-view (so shadows can fade objects on their own) for (int32 i = 0; i < cluster->Instances.Count(); i++) { auto& instance = *cluster->Instances.Get()[i]; @@ -210,20 +212,29 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, // Handling model fade-out transition if (modelFrame == frame && instance.DrawState.PrevLOD != -1) { - // Check if start transition - if (instance.DrawState.LODTransition == 255) + if (transitionLOD) { - instance.DrawState.LODTransition = 0; - } + // Check if start transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.LODTransition = 0; + } - RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); + RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); - // Check if end transition - if (instance.DrawState.LODTransition == 255) - { - instance.DrawState.PrevLOD = lodIndex; + // Check if end transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.PrevLOD = lodIndex; + } + else + { + const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD); + const float normalizedProgress = static_cast(instance.DrawState.LODTransition) * (1.0f / 255.0f); + DrawInstance(renderContext, instance, type, model, prevLOD, normalizedProgress, drawCallsLists, result); + } } - else + else if (instance.DrawState.LODTransition < 255) { const auto prevLOD = model->ClampLODIndex(instance.DrawState.PrevLOD); const float normalizedProgress = static_cast(instance.DrawState.LODTransition) * (1.0f / 255.0f); @@ -236,29 +247,32 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, lodIndex += renderContext.View.ModelLODBias; lodIndex = model->ClampLODIndex(lodIndex); - // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports) - if (modelFrame == frame) + if (transitionLOD) { - // Check if start transition - if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255) + // Check if it's the new frame and could update the drawing state (note: model instance could be rendered many times per frame to different viewports) + if (modelFrame == frame) { + // Check if start transition + if (instance.DrawState.PrevLOD != lodIndex && instance.DrawState.LODTransition == 255) + { + instance.DrawState.LODTransition = 0; + } + + RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); + + // Check if end transition + if (instance.DrawState.LODTransition == 255) + { + instance.DrawState.PrevLOD = lodIndex; + } + } + // Check if there was a gap between frames in drawing this model instance + else if (modelFrame < frame || instance.DrawState.PrevLOD == -1) + { + // Reset state + instance.DrawState.PrevLOD = lodIndex; instance.DrawState.LODTransition = 0; } - - RenderTools::UpdateModelLODTransition(instance.DrawState.LODTransition); - - // Check if end transition - if (instance.DrawState.LODTransition == 255) - { - instance.DrawState.PrevLOD = lodIndex; - } - } - // Check if there was a gap between frames in drawing this model instance - else if (modelFrame < frame || instance.DrawState.PrevLOD == -1) - { - // Reset state - instance.DrawState.PrevLOD = lodIndex; - instance.DrawState.LODTransition = 255; } // Draw @@ -281,7 +295,8 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, //DebugDraw::DrawSphere(instance.Bounds, Color::YellowGreen); - instance.DrawState.PrevFrame = frame; + if (transitionLOD) + instance.DrawState.PrevFrame = frame; } } } @@ -350,7 +365,7 @@ void Foliage::DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, draw.DrawState = &instance.DrawState; draw.Bounds = sphere; draw.PerInstanceRandom = instance.Random; - draw.DrawModes = type._drawModes; + draw.DrawModes = type.DrawModes; draw.SetStencilValue(_layer); type.Model->Draw(renderContext, draw); diff --git a/Source/Engine/Foliage/FoliageCluster.cpp b/Source/Engine/Foliage/FoliageCluster.cpp index fd4c0f753..107bf265a 100644 --- a/Source/Engine/Foliage/FoliageCluster.cpp +++ b/Source/Engine/Foliage/FoliageCluster.cpp @@ -21,26 +21,7 @@ void FoliageCluster::Init(const BoundingBox& bounds) void FoliageCluster::UpdateTotalBoundsAndCullDistance() { - if (Children[0]) - { - ASSERT(Instances.IsEmpty()); - - Children[0]->UpdateTotalBoundsAndCullDistance(); - Children[1]->UpdateTotalBoundsAndCullDistance(); - Children[2]->UpdateTotalBoundsAndCullDistance(); - Children[3]->UpdateTotalBoundsAndCullDistance(); - - TotalBounds = Children[0]->TotalBounds; - BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds); - BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds); - BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds); - - MaxCullDistance = Children[0]->MaxCullDistance; - MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance); - MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance); - MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance); - } - else if (Instances.HasItems()) + if (Instances.HasItems()) { BoundingBox box; BoundingBox::FromSphere(Instances[0]->Bounds, TotalBounds); @@ -58,6 +39,30 @@ void FoliageCluster::UpdateTotalBoundsAndCullDistance() MaxCullDistance = 0; } + if (Children[0]) + { + Children[0]->UpdateTotalBoundsAndCullDistance(); + Children[1]->UpdateTotalBoundsAndCullDistance(); + Children[2]->UpdateTotalBoundsAndCullDistance(); + Children[3]->UpdateTotalBoundsAndCullDistance(); + + if (Instances.HasItems()) + BoundingBox::Merge(TotalBounds, Children[0]->TotalBounds, TotalBounds); + else + TotalBounds = Children[0]->TotalBounds; + BoundingBox::Merge(TotalBounds, Children[1]->TotalBounds, TotalBounds); + BoundingBox::Merge(TotalBounds, Children[2]->TotalBounds, TotalBounds); + BoundingBox::Merge(TotalBounds, Children[3]->TotalBounds, TotalBounds); + + if (Instances.HasItems()) + MaxCullDistance = Math::Max(MaxCullDistance, Children[0]->MaxCullDistance); + else + MaxCullDistance = Children[0]->MaxCullDistance; + MaxCullDistance = Math::Max(MaxCullDistance, Children[1]->MaxCullDistance); + MaxCullDistance = Math::Max(MaxCullDistance, Children[2]->MaxCullDistance); + MaxCullDistance = Math::Max(MaxCullDistance, Children[3]->MaxCullDistance); + } + BoundingSphere::FromBox(TotalBounds, TotalBoundsSphere); } diff --git a/Source/Engine/Graphics/Models/SkeletonData.h b/Source/Engine/Graphics/Models/SkeletonData.h index 0b6c7d4d7..79e0be512 100644 --- a/Source/Engine/Graphics/Models/SkeletonData.h +++ b/Source/Engine/Graphics/Models/SkeletonData.h @@ -73,6 +73,10 @@ struct TIsPODType /// class FLAXENGINE_API SkeletonData { +private: + mutable volatile int64 _dirty = 1; + mutable Array _cachedPose; + public: /// /// The nodes in this hierarchy. The root node is always at the index 0. @@ -114,6 +118,11 @@ public: int32 FindNode(const StringView& name) const; int32 FindBone(int32 nodeIndex) const; + // Gets the skeleton nodes transforms in mesh space (pose). Calculated from the local node transforms and hierarchy. Cached internally and updated when data is dirty. + const Array& GetNodesPose() const; + + // Marks data as dirty (modified) to update internal state and recalculate cached data if needed (eg. skeleton pose). + void Dirty(); uint64 GetMemoryUsage() const; /// diff --git a/Source/Engine/Graphics/Models/SkinnedMesh.cpp b/Source/Engine/Graphics/Models/SkinnedMesh.cpp index 66b3e5701..0377003be 100644 --- a/Source/Engine/Graphics/Models/SkinnedMesh.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMesh.cpp @@ -154,6 +154,8 @@ void SkeletonData::Swap(SkeletonData& other) { Nodes.Swap(other.Nodes); Bones.Swap(other.Bones); + Dirty(); + other.Dirty(); } Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const @@ -171,6 +173,7 @@ Transform SkeletonData::GetNodeTransform(int32 nodeIndex) const void SkeletonData::SetNodeTransform(int32 nodeIndex, const Transform& value) { CHECK(Nodes.IsValidIndex(nodeIndex)); + Dirty(); const int32 parentIndex = Nodes[nodeIndex].ParentIndex; if (parentIndex == -1) { @@ -201,6 +204,39 @@ int32 SkeletonData::FindBone(int32 nodeIndex) const return -1; } +const Array& SkeletonData::GetNodesPose() const +{ + // Guard with a simple atomic flag to avoid locking if the pose is up to date + if (Platform::AtomicRead(&_dirty)) + { + ScopeLock lock(RenderContext::GPULocker); + if (Platform::AtomicRead(&_dirty)) + { + const SkeletonNode* nodes = Nodes.Get(); + const int32 nodesCount = Nodes.Count(); + _cachedPose.Resize(nodesCount); + Matrix* posePtr = _cachedPose.Get(); + for (int32 nodeIndex = 0; nodeIndex < nodesCount; nodeIndex++) + { + const SkeletonNode& node = nodes[nodeIndex]; + Matrix local; + Matrix::Transformation(node.LocalTransform.Scale, node.LocalTransform.Orientation, node.LocalTransform.Translation, local); + if (node.ParentIndex != -1) + Matrix::Multiply(local, posePtr[node.ParentIndex], posePtr[nodeIndex]); + else + posePtr[nodeIndex] = local; + } + Platform::AtomicStore(&_dirty, 0); + } + } + return _cachedPose; +} + +void SkeletonData::Dirty() +{ + Platform::AtomicStore(&_dirty, 1); +} + uint64 SkeletonData::GetMemoryUsage() const { uint64 result = Nodes.Capacity() * sizeof(SkeletonNode) + Bones.Capacity() * sizeof(SkeletonBone); diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp index 8470facac..eb2ea0145 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.cpp @@ -5,11 +5,6 @@ #include "Engine/Animations/Config.h" #include "Engine/Core/Log.h" #include "Engine/Core/Math/Matrix.h" -#include "Engine/Core/Math/Matrix3x4.h" - -SkinnedMeshDrawData::SkinnedMeshDrawData() -{ -} SkinnedMeshDrawData::~SkinnedMeshDrawData() { @@ -33,7 +28,7 @@ void SkinnedMeshDrawData::Setup(int32 bonesCount) BonesCount = bonesCount; _hasValidData = false; - _isDirty = false; + _isDirty = true; Data.Resize(BoneMatrices->GetSize()); SAFE_DELETE_GPU_RESOURCE(PrevBoneMatrices); } diff --git a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h index 24d5ca230..dc780a26d 100644 --- a/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h +++ b/Source/Engine/Graphics/Models/SkinnedMeshDrawData.h @@ -36,11 +36,6 @@ public: Array Data; public: - /// - /// Initializes a new instance of the class. - /// - SkinnedMeshDrawData(); - /// /// Finalizes an instance of the class. /// @@ -76,7 +71,7 @@ public: void OnDataChanged(bool dropHistory); /// - /// After bones Data has been send to the GPU buffer. + /// After bones Data has been sent to the GPU buffer. /// void OnFlush() { diff --git a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp index 05c6d605a..e458ff1c1 100644 --- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp +++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp @@ -8,6 +8,7 @@ #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUBuffer.h" #include "Engine/Graphics/PixelFormatExtensions.h" +#include "Engine/Threading/ConcurrentDictionary.h" #if GPU_ENABLE_RESOURCE_NAMING #include "Engine/Scripting/Enums.h" #endif @@ -40,27 +41,37 @@ uint32 GetHash(const VertexBufferLayouts& key) namespace { - CriticalSection CacheLocker; - Dictionary LayoutCache; - Dictionary VertexBufferCache; + ConcurrentDictionary LayoutCache; + ConcurrentDictionary VertexBufferCache; - GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count) + GPUVertexLayout* GetCache(const VertexBufferLayouts& key, int32 count) { - GPUVertexLayout::Elements elements; - bool anyValid = false; - for (int32 slot = 0; slot < count; slot++) + GPUVertexLayout* result; + if (!VertexBufferCache.TryGet(key, result)) { - if (key.Layouts[slot]) + GPUVertexLayout::Elements elements; + bool anyValid = false; + for (int32 slot = 0; slot < count; slot++) { - anyValid = true; - int32 start = elements.Count(); - elements.Add(key.Layouts[slot]->GetElements()); - for (int32 j = start; j < elements.Count(); j++) - elements.Get()[j].Slot = (byte)slot; + if (key.Layouts[slot]) + { + anyValid = true; + int32 start = elements.Count(); + elements.Add(key.Layouts[slot]->GetElements()); + for (int32 j = start; j < elements.Count(); j++) + elements.Get()[j].Slot = (byte)slot; + } } + result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; + if (!VertexBufferCache.Add(key, result)) + { + // Other thread added the value + Delete(result); + bool found = VertexBufferCache.TryGet(key, result); + ASSERT(found); + } + } - GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; - VertexBufferCache.Add(key, result); return result; } } @@ -148,7 +159,6 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff } // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!LayoutCache.TryGet(hash, result)) { @@ -160,12 +170,16 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff LOG(Error, " {}", e.ToString()); #endif LOG(Error, "Failed to create vertex layout"); - CacheLocker.Unlock(); return nullptr; } - LayoutCache.Add(hash, result); + if (!LayoutCache.Add(hash, result)) + { + // Other thread added the value + Delete(result); + bool found = LayoutCache.TryGet(hash, result); + ASSERT(found); + } } - CacheLocker.Unlock(); return result; } @@ -185,13 +199,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& vertexBuffers) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); - GPUVertexLayout* result; - if (!VertexBufferCache.TryGet(key, result)) - result = AddCache(key, vertexBuffers.Length()); - CacheLocker.Unlock(); - - return result; + return GetCache(key, vertexBuffers.Length()); } GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) @@ -209,13 +217,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); - GPUVertexLayout* result; - if (!VertexBufferCache.TryGet(key, result)) - result = AddCache(key, layouts.Length()); - CacheLocker.Unlock(); - - return result; + return GetCache(key, layouts.Length()); } GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder) diff --git a/Source/Engine/Level/Actor.cpp b/Source/Engine/Level/Actor.cpp index 7d07cd0e7..f52fab600 100644 --- a/Source/Engine/Level/Actor.cpp +++ b/Source/Engine/Level/Actor.cpp @@ -1685,7 +1685,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos) const { const Vector3 direction = worldPos - _transform.Translation; if (direction.LengthSquared() < ZeroTolerance) - return _parent->GetOrientation(); + return _parent ? _parent->GetOrientation() : Quaternion::Identity; const Float3 newForward = Vector3::Normalize(direction); const Float3 oldForward = _transform.Orientation * Vector3::Forward; @@ -1712,7 +1712,7 @@ Quaternion Actor::LookingAt(const Vector3& worldPos, const Vector3& worldUp) con { const Vector3 direction = worldPos - _transform.Translation; if (direction.LengthSquared() < ZeroTolerance) - return _parent->GetOrientation(); + return _parent ? _parent->GetOrientation() : Quaternion::Identity; const Float3 forward = Vector3::Normalize(direction); const Float3 up = Vector3::Normalize(worldUp); if (Math::IsOne(Float3::Dot(forward, up))) diff --git a/Source/Engine/Level/Actors/AnimatedModel.cpp b/Source/Engine/Level/Actors/AnimatedModel.cpp index f75174f72..11497e558 100644 --- a/Source/Engine/Level/Actors/AnimatedModel.cpp +++ b/Source/Engine/Level/Actors/AnimatedModel.cpp @@ -14,14 +14,84 @@ #include "Engine/Content/Deprecated.h" #include "Engine/Graphics/GPUContext.h" #include "Engine/Graphics/GPUDevice.h" +#include "Engine/Graphics/GPUPass.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/Models/MeshAccessor.h" #include "Engine/Graphics/Models/MeshDeformation.h" +#include "Engine/Renderer/RenderList.h" #include "Engine/Level/Scene/Scene.h" #include "Engine/Level/SceneObjectsFactory.h" -#include "Engine/Profiler/ProfilerMemory.h" +#include "Engine/Profiler/Profiler.h" #include "Engine/Serialization/Serialization.h" +// Implements efficient skinning data update within a shared GPUMemoryPass with manual resource transitions batched for all animated models. +class AnimatedModelRenderListExtension : public RenderList::IExtension +{ +public: + struct Item + { + GPUBuffer* BoneMatrices; + void* Data; + int32 Size; + }; + + RenderListBuffer Items; + + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + Items.Clear(); + } + + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) override + { + const int32 count = Items.Count(); + if (count == 0) + return; + PROFILE_GPU_CPU_NAMED("Update Bones"); + GPUMemoryPass pass(context); + Item* items = Items.Get(); + + // Special case for D3D11 backend that doesn't need transitions + if (context->GetDevice()->GetRendererType() <= RendererType::DirectX11) + { + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + } + else + { + // Batch resource barriers for buffer update + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::CopyWrite); + + // Update all buffers within Memory Pass (no barriers between) + for (int32 i = 0; i < count; i++) + { + Item& item = items[i]; + context->UpdateBuffer(item.BoneMatrices, item.Data, item.Size); + } + + // Batch resource barriers for reading in Vertex Shader + for (int32 i = 0; i < count; i++) + pass.Transition(items[i].BoneMatrices, GPUResourceAccess::ShaderReadGraphics); + } + +#if COMPILE_WITH_PROFILER + // Insert amount of kilobytes of data updated into profiler trace + uint32 dataSize = 0; + for (int32 i = 0; i < count; i++) + dataSize += items[i].Size; + ZoneValue(dataSize / 1024); +#endif + + Items.Clear(); + } +}; + +AnimatedModelRenderListExtension RenderListExtension; + AnimatedModel::AnimatedModel(const SpawnParams& params) : ModelInstanceActor(params) , _actualMode(AnimationUpdateMode::Never) @@ -1002,7 +1072,7 @@ void AnimatedModel::Draw(RenderContext& renderContext) if (renderContext.View.Pass == DrawPass::GlobalSDF) return; if (renderContext.View.Pass == DrawPass::GlobalSurfaceAtlas) - return; // No supported + return; // Not supported ACTOR_GET_WORLD_MATRIX(this, view, world); GEOMETRY_DRAW_STATE_EVENT_BEGIN(_drawState, world); @@ -1012,9 +1082,8 @@ void AnimatedModel::Draw(RenderContext& renderContext) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - RenderContext::GPULocker.Lock(); - GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - RenderContext::GPULocker.Unlock(); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); + _skinningData.OnFlush(); } SkinnedMesh::DrawInfo draw; @@ -1056,9 +1125,8 @@ void AnimatedModel::Draw(RenderContextBatch& renderContextBatch) // Flush skinning data with GPU if (_skinningData.IsDirty()) { - RenderContext::GPULocker.Lock(); - GPUDevice::Instance->GetMainContext()->UpdateBuffer(_skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count()); - RenderContext::GPULocker.Unlock(); + RenderListExtension.Items.Add({ _skinningData.BoneMatrices, _skinningData.Data.Get(), _skinningData.Data.Count() }); + _skinningData.OnFlush(); } SkinnedMesh::DrawInfo draw; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 951b657b2..7d3703ee0 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -677,11 +677,10 @@ void CleanupGPUParticlesSorting() SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer); } -void DrawEmittersGPU(RenderContextBatch& renderContextBatch) +void DrawEmittersGPU(GPUContext* context, RenderContextBatch& renderContextBatch) { PROFILE_GPU_CPU_NAMED("DrawEmittersGPU"); ScopeReadLock systemScope(Particles::SystemLocker); - GPUContext* context = GPUDevice::Instance->GetMainContext(); // Count draws and sorting passes needed for resources allocation uint32 indirectArgsSize = 0; @@ -1124,9 +1123,9 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff if (GPUEmitterDraws.Count() == 0) { // The first emitter schedules the drawing of all batched draws - renderContextBatch.GetMainContext().List->AddDelayedDraw([](RenderContextBatch& renderContextBatch, int32 contextIndex) + renderContextBatch.GetMainContext().List->AddDelayedDraw([](GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) { - DrawEmittersGPU(renderContextBatch); + DrawEmittersGPU(context, renderContextBatch); }); } GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting }); diff --git a/Source/Engine/Platform/Base/FileSystemBase.cpp b/Source/Engine/Platform/Base/FileSystemBase.cpp index f414bbd01..13ae3481c 100644 --- a/Source/Engine/Platform/Base/FileSystemBase.cpp +++ b/Source/Engine/Platform/Base/FileSystemBase.cpp @@ -12,25 +12,25 @@ bool FileSystemBase::ShowOpenFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array& filenames) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowSaveFileDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& filter, bool multiSelect, const StringView& title, Array& filenames) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowBrowseFolderDialog(Window* parentWindow, const StringView& initialDirectory, const StringView& title, String& path) { - // No supported + // Not supported return true; } bool FileSystemBase::ShowFileExplorer(const StringView& path) { - // No supported + // Not supported return true; } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 544438bb5..ac643b4e8 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -15,6 +15,7 @@ #include "Engine/Profiler/Profiler.h" #include "Engine/Content/Assets/CubeTexture.h" #include "Engine/Core/Log.h" +#include "Engine/Core/Math/Half.h" #include "Engine/Graphics/Shaders/GPUVertexLayout.h" #include "Engine/Level/Scene/Lightmap.h" #include "Engine/Level/Actors/PostFxVolume.h" @@ -30,6 +31,13 @@ namespace Array FreeRenderList; Array> MemPool; CriticalSection MemPoolLocker; + + typedef Array> ExtensionsList; + ExtensionsList& GetExtensions() + { + static ExtensionsList list; + return list; + } } void ShaderObjectData::Store(const Matrix& worldMatrix, const Matrix& prevWorldMatrix, const Rectangle& lightmapUVsArea, const Float3& geometrySize, float perInstanceRandom, float worldDeterminantSign, float lodDitherFactor) @@ -235,6 +243,16 @@ void RenderList::CleanupCache() MemPoolLocker.Unlock(); } +RenderList::IExtension::IExtension() +{ + GetExtensions().Add(this); +} + +RenderList::IExtension::~IExtension() +{ + GetExtensions().Remove(this); +} + bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const { // Sort by higher priority @@ -257,18 +275,31 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh void RenderList::AddDelayedDraw(DelayedDraw&& func) { - MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc _delayedDraws.Add(MoveTemp(func)); - MemPoolLocker.Unlock(); } -void RenderList::DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex) +void RenderList::DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex) { - if (_delayedDraws.IsEmpty()) + if (_delayedDraws.Count() == 0) return; + PROFILE_CPU(); for (DelayedDraw& e : _delayedDraws) - e(renderContextBatch, contextIndex); - _delayedDraws.SetCapacity(0); + e(context, renderContextBatch, renderContextIndex); + _delayedDraws.Clear(); +} + +#define LOOP_EXTENSIONS() const auto& extensions = GetExtensions(); for (auto* e : extensions) + +void RenderList::PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PreDraw(context, renderContextBatch); +} + +void RenderList::PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) +{ + LOOP_EXTENSIONS() + e->PostDraw(context, renderContextBatch); } void RenderList::BlendSettings() @@ -494,7 +525,6 @@ RenderList::RenderList(const SpawnParams& params) , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) - , _delayedDraws(&Memory) { } @@ -826,6 +856,13 @@ FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b) Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0; } +FORCE_INLINE Span GetVB(GPUBuffer* const* ptr, int32 maxSize) +{ + while (ptr[maxSize - 1] == nullptr && maxSize > 1) + maxSize--; + return ToSpan(ptr, maxSize); +} + void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input) { if (list.IsEmpty()) @@ -954,7 +991,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(activeDraw->Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets); + context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets); context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex); instanceBufferOffset += activeCount; @@ -971,7 +1008,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL // Single-draw call batch context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); @@ -994,7 +1031,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets); + context->BindVB(GetVB(vb, vbMax + 1), vbOffsets); if (drawCall.InstanceCount == 0) { @@ -1024,7 +1061,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex]; context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { @@ -1045,7 +1082,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL bindParams.DrawCall->Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); for (int32 j = 0; j < batch.Instances.Count(); j++) { @@ -1069,7 +1106,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL drawCall.Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 8eb3540e0..b4b7121de 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -4,7 +4,6 @@ #include "Engine/Core/Collections/Array.h" #include "Engine/Core/Memory/ArenaAllocation.h" -#include "Engine/Core/Math/Half.h" #include "Engine/Graphics/PostProcessSettings.h" #include "Engine/Graphics/DynamicBuffer.h" #include "Engine/Scripting/ScriptingObject.h" @@ -327,6 +326,21 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject /// static void CleanupCache(); + /// + /// The rendering extension interface for custom drawing/effects linked to RenderList. Can be used during async scene drawing and further drawing/processing for more optimized rendering. + /// + class FLAXENGINE_API IExtension + { + public: + IExtension(); + virtual ~IExtension(); + + // Event called before collecting draw calls. Can be used for initialization. + virtual void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + // Event called after collecting draw calls. Can be used for cleanup or to perform additional drawing using collected draw calls data such as batched data processing. + virtual void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch) {} + }; + public: /// /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs. @@ -460,13 +474,14 @@ public: /// DynamicTypedBuffer TempObjectBuffer; - typedef Function DelayedDraw; + typedef Function DelayedDraw; void AddDelayedDraw(DelayedDraw&& func); - void DrainDelayedDraws(RenderContextBatch& renderContextBatch, int32 contextIndex); + void DrainDelayedDraws(GPUContext* context, RenderContextBatch& renderContextBatch, int32 renderContextIndex); /// /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing. /// + /// Can be called in async during scene rendering (thread-safe internally). Lambda is allocated by concurrent arena allocator owned by the RenderList. template FORCE_INLINE void AddDelayedDraw(const T& lambda) { @@ -475,9 +490,13 @@ public: AddDelayedDraw(MoveTemp(func)); } + // IExtension implementation + void PreDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + void PostDraw(GPUContext* context, RenderContextBatch& renderContextBatch); + private: DynamicVertexBuffer _instanceBuffer; - Array _delayedDraws; + RenderListBuffer _delayedDraws; public: /// diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index fd7d43c8b..96253934e 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -423,6 +423,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont if (setup.UseMotionVectors) view.Pass |= DrawPass::MotionVectors; renderContextBatch.GetMainContext() = renderContext; // Sync render context in batch with the current value + renderContext.List->PreDraw(context, renderContextBatch); bool drawShadows = !isGBufferDebug && EnumHasAnyFlags(view.Flags, ViewFlags::Shadows) && ShadowsPass::Instance()->IsReady(); switch (renderContext.View.Mode) @@ -461,7 +462,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++) - renderContextBatch.Contexts[i].List->DrainDelayedDraws(renderContextBatch, i); + renderContextBatch.Contexts[i].List->DrainDelayedDraws(context, renderContextBatch, i); + renderContext.List->PostDraw(context, renderContextBatch); #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); diff --git a/Source/Engine/Scripting/BinaryModule.cpp b/Source/Engine/Scripting/BinaryModule.cpp index 4d26e678b..bbcd7de57 100644 --- a/Source/Engine/Scripting/BinaryModule.cpp +++ b/Source/Engine/Scripting/BinaryModule.cpp @@ -683,6 +683,8 @@ BinaryModule* BinaryModule::GetModule(const StringAnsiView& name) BinaryModule::BinaryModule() { + CanReload = USE_EDITOR; + // Register GetModules().Add(this); } diff --git a/Source/Engine/Scripting/BinaryModule.h b/Source/Engine/Scripting/BinaryModule.h index 70aa60fff..1da35401b 100644 --- a/Source/Engine/Scripting/BinaryModule.h +++ b/Source/Engine/Scripting/BinaryModule.h @@ -91,6 +91,11 @@ public: /// Dictionary TypeNameToTypeIndex; + /// + /// Determinates whether module can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some modules such as engine and class library modules are static. + /// + bool CanReload; + public: /// diff --git a/Source/Engine/Scripting/ManagedCLR/MAssembly.h b/Source/Engine/Scripting/ManagedCLR/MAssembly.h index 6c0aa9579..0a785c06a 100644 --- a/Source/Engine/Scripting/ManagedCLR/MAssembly.h +++ b/Source/Engine/Scripting/ManagedCLR/MAssembly.h @@ -34,6 +34,7 @@ private: int32 _isLoaded : 1; int32 _isLoading : 1; + int32 _canReload : 1; mutable int32 _hasCachedClasses : 1; mutable ClassesDictionary _classes; @@ -125,6 +126,14 @@ public: return _isLoaded != 0; } + /// + /// Returns true if assembly can be hot-reloaded at runtime. For example, in Editor after scripts recompilation. Some assemblies such as engine and class library modules are static. + /// + FORCE_INLINE bool CanReload() const + { + return USE_EDITOR && _canReload; + } + /// /// Gets the assembly name. /// diff --git a/Source/Engine/Scripting/ManagedCLR/MCore.cpp b/Source/Engine/Scripting/ManagedCLR/MCore.cpp index 350cc39d2..6fa499002 100644 --- a/Source/Engine/Scripting/ManagedCLR/MCore.cpp +++ b/Source/Engine/Scripting/ManagedCLR/MCore.cpp @@ -45,6 +45,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name) : _domain(domain) , _isLoaded(false) , _isLoading(false) + , _canReload(true) , _hasCachedClasses(false) , _reloadCount(0) , _name(name) @@ -59,6 +60,7 @@ MAssembly::MAssembly(MDomain* domain, const StringAnsiView& name, const StringAn , _domain(domain) , _isLoaded(false) , _isLoading(false) + , _canReload(true) , _hasCachedClasses(false) , _reloadCount(0) , _name(name) diff --git a/Source/Engine/Scripting/Runtime/DotNet.cpp b/Source/Engine/Scripting/Runtime/DotNet.cpp index 1c8c2bcdd..4be0ce1a1 100644 --- a/Source/Engine/Scripting/Runtime/DotNet.cpp +++ b/Source/Engine/Scripting/Runtime/DotNet.cpp @@ -874,6 +874,7 @@ bool MAssembly::LoadCorlib() return true; } _hasCachedClasses = false; + _canReload = false; CachedAssemblyHandles.Add(_handle, this); // End diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 4e17bb80a..aa7e26674 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -502,6 +502,7 @@ bool Scripting::LoadBinaryModules(const String& path, const String& projectFolde // C# if (managedPath.HasChars() && !((ManagedBinaryModule*)module)->Assembly->IsLoaded()) { + (((ManagedBinaryModule*)module)->Assembly)->_canReload = module->CanReload; if (((ManagedBinaryModule*)module)->Assembly->Load(managedPath, nativePath)) { LOG(Error, "Failed to load C# assembly '{0}' for binary module {1}.", managedPath, name); @@ -528,6 +529,7 @@ bool Scripting::Load() #if USE_CSHARP // Load C# core assembly ManagedBinaryModule* corlib = GetBinaryModuleCorlib(); + corlib->CanReload = false; if (corlib->Assembly->LoadCorlib()) { LOG(Error, "Failed to load corlib C# assembly."); @@ -581,6 +583,8 @@ bool Scripting::Load() LOG(Error, "Failed to load FlaxEngine C# assembly."); return true; } + flaxEngineModule->CanReload = false; + flaxEngineModule->Assembly->_canReload = false; onEngineLoaded(flaxEngineModule->Assembly); // Insert type aliases for vector types that don't exist in C++ but are just typedef (properly redirect them to actual types) diff --git a/Source/Engine/Serialization/Serialization.cpp b/Source/Engine/Serialization/Serialization.cpp index a3dfc6ffa..1eb6b0181 100644 --- a/Source/Engine/Serialization/Serialization.cpp +++ b/Source/Engine/Serialization/Serialization.cpp @@ -78,7 +78,10 @@ void Serialization::Deserialize(ISerializable::DeserializeStream& stream, Varian v.Type = VariantType::Null; const auto mTypeName = SERIALIZE_FIND_MEMBER(stream, "TypeName"); if (mTypeName != stream.MemberEnd() && mTypeName->value.IsString()) + { v.SetTypeName(StringAnsiView(mTypeName->value.GetStringAnsiView())); + v.Inline(); + } } else { diff --git a/Source/Engine/Serialization/Stream.cpp b/Source/Engine/Serialization/Stream.cpp index f95e9ef9b..4c9b94042 100644 --- a/Source/Engine/Serialization/Stream.cpp +++ b/Source/Engine/Serialization/Stream.cpp @@ -255,6 +255,7 @@ void ReadStream::Read(VariantType& data) ptr++; } *ptr = 0; + data.Inline(); } else if (typeNameLength > 0) { diff --git a/Source/Engine/Threading/ConcurrentDictionary.h b/Source/Engine/Threading/ConcurrentDictionary.h new file mode 100644 index 000000000..1b78a735e --- /dev/null +++ b/Source/Engine/Threading/ConcurrentDictionary.h @@ -0,0 +1,318 @@ +// Copyright (c) Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Core/Collections/Dictionary.h" +#include "Engine/Platform/CriticalSection.h" + +/// +/// Template for unordered dictionary with mapped key with value pairs that supports asynchronous data reading and writing. +/// Implemented via reader-writer lock pattern, so multiple threads can read data at the same time, but only one thread can write data and it blocks all other threads (including readers) until the write operation is finished. +/// Optimized for frequent reads (no lock operation). +/// +/// The type of the keys in the dictionary. +/// The type of the values in the dictionary. +/// The type of memory allocator. +template +class ConcurrentDictionary : Dictionary +{ + friend ConcurrentDictionary; +public: + typedef Dictionary Base; + typedef DictionaryBucket Bucket; + using AllocationData = typename AllocationType::template Data; + using AllocationTag = typename AllocationType::Tag; + +private: + mutable volatile int64 _threadsReading = 0; + volatile int64 _threadsWriting = 0; + CriticalSection _locker; + +public: + /// + /// Initializes an empty without reserving any space. + /// + ConcurrentDictionary() + { + } + + /// + /// Initializes an empty without reserving any space. + /// + /// The custom allocation tag. + ConcurrentDictionary(AllocationTag tag) + : Base(tag) + { + } + + /// + /// Finalizes an instance of the class. + /// + ~ConcurrentDictionary() + { + Clear(); + } + +public: + /// + /// Gets the amount of the elements in the collection. + /// + int32 Count() const + { + Reader reader(this); + return Base::_elementsCount; + } + + /// + /// Gets the amount of the elements that can be contained by the collection. + /// + int32 Capacity() const + { + Reader reader(this); + return Base::_size; + } + + /// + /// Tries to get element with given key. + /// + /// The key of the element. + /// The result value. + /// True if element of given key has been found, otherwise false. + template + bool TryGet(const KeyComparableType& key, ValueType& result) const + { + Reader reader(this); + typename Base::FindPositionResult pos; + Base::FindPosition(key, pos); + if (pos.ObjectIndex != -1) + result = Base::_allocation.Get()[pos.ObjectIndex].Value; + return pos.ObjectIndex != -1; + } + +public: + /// + /// Adds a pair of key and value to the collection. + /// + /// The key. + /// The value. + /// True if added element, otherwise false if it already exists (or other thread added it). + template + bool Add(const KeyComparableType& key, const ValueType& value) + { + Writer writer(this); + Bucket* bucket = Base::OnAdd(key, false, true); + if (bucket) + bucket->Occupy(key, value); + return bucket != nullptr; + } + + /// + /// Removes element with a specified key. + /// + /// The element key to remove. + /// True if item was removed from collection, otherwise false. + template + bool Remove(const KeyComparableType& key) + { + Writer writer(this); + return Base::Remove(key); + } + +public: + /// + /// Removes all elements from the collection. + /// + void Clear() + { + Writer writer(this); + Base::Clear(); + } + +public: + /// + /// The read-only dictionary collection iterator. + /// + struct ConstIterator : Base::IteratorBase + { + friend ConcurrentDictionary; + public: + ConstIterator(const ConcurrentDictionary* collection, const int32 index) + : Base::IteratorBase(collection, index) + { + if (collection) + collection->BeginRead(); + } + + ConstIterator(const ConstIterator& i) + : Base::IteratorBase(i._collection, i._index) + { + if (i.collection) + i.collection->BeginRead(); + } + + ConstIterator(ConstIterator&& i) noexcept + : Base::IteratorBase(i._collection, i._index) + { + i._collection = nullptr; + } + + ~ConstIterator() + { + if (this->_collection) + ((ConcurrentDictionary*)this->_collection)->EndRead(); + } + + public: + FORCE_INLINE bool operator!() const + { + return !(bool)*this; + } + + FORCE_INLINE bool operator==(const ConstIterator& v) const + { + return this->_index == v._index && this->_collection == v._collection; + } + + FORCE_INLINE bool operator!=(const ConstIterator& v) const + { + return this->_index != v._index || this->_collection != v._collection; + } + + ConstIterator& operator=(const ConstIterator& v) + { + this->_collection = v._collection; + this->_index = v._index; + return *this; + } + + ConstIterator& operator=(ConstIterator&& v) noexcept + { + this->_collection = v._collection; + this->_index = v._index; + v._collection = nullptr; + return *this; + } + + ConstIterator& operator++() + { + this->Next(); + return *this; + } + + ConstIterator operator++(int) const + { + ConstIterator i = *this; + i.Next(); + return i; + } + + ConstIterator& operator--() + { + this->Prev(); + return *this; + } + + ConstIterator operator--(int) const + { + ConstIterator i = *this; + i.Prev(); + return i; + } + }; + + ConstIterator begin() const + { + ConstIterator i(this, -1); + ++i; + return i; + } + + FORCE_INLINE ConstIterator end() const + { + return ConstIterator(this, Base::_size); + } + +private: + void BeginWrite() + { + Platform::InterlockedIncrement(&_threadsWriting); + + // Wait for all reads to end + RETRY: + while (Platform::AtomicRead(&_threadsReading)) + Platform::Yield(); + + // Thread-safe writing + _locker.Lock(); + if (Platform::AtomicRead(&_threadsReading)) + { + // Other reader entered during mutex locking so give them a chance to transition into active-waiting + _locker.Unlock(); + goto RETRY; + } + } + + void EndWrite() + { + _locker.Unlock(); + Platform::InterlockedDecrement(&_threadsWriting); + } + + void BeginRead() const + { + RETRY: + Platform::InterlockedIncrement(&_threadsReading); + + // Check if any thread is writing (or is about to write) + if (Platform::AtomicRead(&_threadsWriting) != 0) + { + // Wait for all writes to end + Platform::InterlockedDecrement(&_threadsReading); + while (Platform::AtomicRead(&_threadsWriting)) + Platform::Yield(); + + // Try again + goto RETRY; + } + } + + void EndRead() const + { + Platform::InterlockedDecrement(&_threadsReading); + } + +private: + // Utility for methods that read-write state. + struct Writer + { + ConcurrentDictionary* _collection; + + Writer(ConcurrentDictionary* collection) + : _collection(collection) + { + _collection->BeginWrite(); + } + + ~Writer() + { + _collection->EndWrite(); + } + }; + + // Utility for methods that read-only state. + struct Reader + { + const ConcurrentDictionary* _collection; + + Reader(const ConcurrentDictionary* collection) + : _collection(collection) + { + _collection->BeginRead(); + } + + ~Reader() + { + _collection->EndRead(); + } + }; +}; diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index 692a088b7..8d62aa8e3 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -8,7 +8,6 @@ #include "Engine/Core/Types/Span.h" #include "Engine/Core/Types/Pair.h" #include "Engine/Core/Memory/SimpleHeapAllocation.h" -#include "Engine/Core/Collections/Dictionary.h" #include "Engine/Core/Collections/RingBuffer.h" #include "Engine/Engine/EngineService.h" #include "Engine/Profiler/ProfilerCPU.h" @@ -22,14 +21,6 @@ #if JOB_SYSTEM_ENABLED -// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread). -class JobSystemAllocation : public SimpleHeapAllocation -{ -public: - static void* Allocate(uintptr size); - static void Free(void* ptr, uintptr size); -}; - class JobSystemService : public EngineService { public: @@ -43,30 +34,25 @@ public: void Dispose() override; }; -struct JobData +// Holds a single job dispatch data +struct alignas(int64) JobContext { - int32 Index; - int64 JobKey; -}; - -template<> -struct TIsPODType -{ - enum { Value = true }; -}; - -struct JobContext -{ - volatile int64 JobsLeft; - int32 DependenciesLeft; + // The next index of the job to process updated when picking a job by the thread. + volatile int64 JobIndex = 0; + // The number of jobs left to process updated after job completion by the thread. + volatile int64 JobsLeft = 0; + // The unique label of this job used to identify it. Set to -1 when job is done. + volatile int64 JobLabel = 0; + // Utility atomic counter used to indicate that any job is waiting for this one to finish. Then Dependants can be accessed within thread-safe JobsLocker. + volatile int64 DependantsCount = 0; + // The number of dependency jobs left to be finished before starting this job. + volatile int64 DependenciesLeft = 0; + // The total number of jobs to process (in this context). + int32 JobsCount = 0; + // The job function to execute. Function Job; - Array Dependants; -}; - -template<> -struct TIsPODType -{ - enum { Value = false }; + // List of dependant jobs to signal when this job is done. + Array Dependants; }; class JobSystemThread : public IRunnable @@ -92,50 +78,36 @@ public: namespace { JobSystemService JobSystemInstance; - Array> MemPool; Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {}; int32 ThreadsCount = 0; bool JobStartingOnDispatch = true; volatile int64 ExitFlag = 0; volatile int64 JobLabel = 0; - Dictionary JobContexts; + volatile int64 JobEndLabel = 0; + volatile int64 JobStartLabel = 0; + volatile int64 JobContextsCount = 0; + uint32 JobContextsSize = 0; + uint32 JobContextsMask = 0; + JobContext* JobContexts = nullptr; ConditionVariable JobsSignal; CriticalSection JobsMutex; ConditionVariable WaitSignal; CriticalSection WaitMutex; CriticalSection JobsLocker; - RingBuffer Jobs; -} - -void* JobSystemAllocation::Allocate(uintptr size) -{ - void* result = nullptr; - for (int32 i = 0; i < MemPool.Count(); i++) - { - if (MemPool.Get()[i].Second == size) - { - result = MemPool.Get()[i].First; - MemPool.RemoveAt(i); - break; - } - } - if (!result) - { - PROFILE_MEM(EngineThreading); - result = Platform::Allocate(size, 16); - } - return result; -} - -void JobSystemAllocation::Free(void* ptr, uintptr size) -{ - PROFILE_MEM(EngineThreading); - MemPool.Add({ ptr, size }); +#define GET_CONTEXT_INDEX(label) (uint32)((label) & (int64)JobContextsMask) } bool JobSystemService::Init() { PROFILE_MEM(EngineThreading); + + // Initialize job context storage (fixed-size ring buffer for active jobs tracking) + JobContextsSize = 256; + JobContextsMask = JobContextsSize - 1; + JobContexts = (JobContext*)Platform::Allocate(JobContextsSize * sizeof(JobContext), alignof(JobContext)); + Memory::ConstructItems(JobContexts, (int32)JobContextsSize); + + // Spawn threads ThreadsCount = Math::Min(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads)); for (int32 i = 0; i < ThreadsCount; i++) { @@ -146,6 +118,7 @@ bool JobSystemService::Init() return true; Threads[i] = thread; } + return false; } @@ -171,35 +144,67 @@ void JobSystemService::Dispose() } } - JobContexts.SetCapacity(0); - Jobs.Release(); - for (auto& e : MemPool) - Platform::Free(e.First); - MemPool.Clear(); + Memory::DestructItems(JobContexts, (int32)JobContextsSize); + Platform::Free(JobContexts); + JobContexts = nullptr; } int32 JobSystemThread::Run() { + // Pin thread to the physical core Platform::SetThreadAffinityMask(1ull << Index); - JobData data; - Function job; bool attachCSharpThread = true; MONO_THREAD_INFO_TYPE* monoThreadInfo = nullptr; while (Platform::AtomicRead(&ExitFlag) == 0) { // Try to get a job - JobsLocker.Lock(); - if (Jobs.Count() != 0) + int32 jobIndex; + JobContext* jobContext = nullptr; { - data = Jobs.PeekFront(); - Jobs.PopFront(); - const JobContext& context = ((const Dictionary&)JobContexts).At(data.JobKey); - job = context.Job; - } - JobsLocker.Unlock(); + int64 jobOffset = 0; + RETRY: + int64 jobStartLabel = Platform::AtomicRead(&JobStartLabel) + jobOffset; + int64 jobEndLabel = Platform::AtomicRead(&JobEndLabel); + if (jobStartLabel <= jobEndLabel && jobEndLabel > 0) + { + jobContext = &JobContexts[GET_CONTEXT_INDEX(jobStartLabel)]; + if (Platform::AtomicRead(&jobContext->DependenciesLeft) > 0) + { + // This job still waits for dependency so skip it for now and try the next one + jobOffset++; + jobContext = nullptr; + goto RETRY; + } - if (job.IsBinded()) + // Move forward with index for a job + jobIndex = (int32)(Platform::InterlockedIncrement(&jobContext->JobIndex) - 1); + if (jobIndex < jobContext->JobsCount) + { + // Index is valid + } + else if (jobStartLabel < jobEndLabel && jobOffset == 0) + { + // No more jobs inside this context, move to the next one + Platform::InterlockedCompareExchange(&JobStartLabel, jobStartLabel + 1, jobStartLabel); + jobContext = nullptr; + goto RETRY; + } + else + { + // No more jobs + jobContext = nullptr; + if (jobStartLabel < jobEndLabel) + { + // Try with a different one before going to sleep + jobOffset++; + goto RETRY; + } + } + } + } + + if (jobContext) { #if USE_CSHARP // Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System) @@ -212,37 +217,39 @@ int32 JobSystemThread::Run() #endif // Run job - job(data.Index); + jobContext->Job(jobIndex); // Move forward with the job queue - bool notifyWaiting = false; - JobsLocker.Lock(); - JobContext& context = JobContexts.At(data.JobKey); - if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0) + if (Platform::InterlockedDecrement(&jobContext->JobsLeft) <= 0) { - // Update any dependant jobs - for (int64 dependant : context.Dependants) + // Mark job as done before processing dependants + Platform::AtomicStore(&jobContext->JobLabel, -1); + + // Check if any other job waits on this one + if (Platform::AtomicRead(&jobContext->DependantsCount) != 0) { - JobContext& dependantContext = JobContexts.At(dependant); - if (--dependantContext.DependenciesLeft <= 0) + // Update dependant jobs + JobsLocker.Lock(); + for (int64 dependant : jobContext->Dependants) { - // Dispatch dependency when it's ready - JobData dependantData; - dependantData.JobKey = dependant; - for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++) - Jobs.PushBack(dependantData); + JobContext& dependantContext = JobContexts[GET_CONTEXT_INDEX(dependant)]; + if (dependantContext.JobLabel == dependant) + Platform::InterlockedDecrement(&dependantContext.DependenciesLeft); } + JobsLocker.Unlock(); } - // Remove completed context - JobContexts.Remove(data.JobKey); - notifyWaiting = true; - } - JobsLocker.Unlock(); - if (notifyWaiting) - WaitSignal.NotifyAll(); + // Cleanup completed context + jobContext->Job.Unbind(); + jobContext->Dependants.Clear(); + Platform::AtomicStore(&jobContext->DependantsCount, 0); + Platform::AtomicStore(&jobContext->DependenciesLeft, -999); // Mark to indicate deleted context + Platform::AtomicStore(&jobContext->JobLabel, -1); + Platform::InterlockedDecrement(&JobContextsCount); - job.Unbind(); + // Wakeup any thread waiting for the jobs to complete + WaitSignal.NotifyAll(); + } } else { @@ -266,8 +273,8 @@ void JobSystem::Execute(const Function& job, int32 jobCount) if (jobCount > 1) { // Async - const int64 jobWaitHandle = Dispatch(job, jobCount); - Wait(jobWaitHandle); + const int64 label = Dispatch(job, jobCount); + Wait(label); } else #endif @@ -284,21 +291,31 @@ int64 JobSystem::Dispatch(const Function& job, int32 jobCount) return 0; PROFILE_CPU(); #if JOB_SYSTEM_ENABLED - const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; + while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize) + { + // Too many jobs in flight, wait for some to complete to free up contexts + PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW"); + ZoneColor(TracyWaitZoneColor); + Platform::InterlockedDecrement(&JobContextsCount); + Platform::Sleep(1); + } - JobData data; - data.JobKey = label; + // Get a new label + const int64 label = Platform::InterlockedIncrement(&JobLabel); - JobContext context; + // Build job + JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; context.Job = job; + context.JobIndex = 0; context.JobsLeft = jobCount; + context.JobLabel = label; + context.DependantsCount = 0; context.DependenciesLeft = 0; + context.JobsCount = jobCount; + context.Dependants.Clear(); - JobsLocker.Lock(); - JobContexts.Add(label, MoveTemp(context)); - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.PushBack(data); - JobsLocker.Unlock(); + // Move the job queue forward + Platform::InterlockedIncrement(&JobEndLabel); if (JobStartingOnDispatch) { @@ -321,34 +338,47 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen if (jobCount <= 0) return 0; PROFILE_CPU(); + PROFILE_MEM(EngineThreading); #if JOB_SYSTEM_ENABLED - const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount; + while (Platform::InterlockedIncrement(&JobContextsCount) >= JobContextsSize) + { + // Too many jobs in flight, wait for some to complete to free up contexts + PROFILE_CPU_NAMED("JOB SYSTEM OVERFLOW"); + ZoneColor(TracyWaitZoneColor); + Platform::InterlockedDecrement(&JobContextsCount); + Platform::Sleep(1); + } - JobData data; - data.JobKey = label; + // Get a new label + const int64 label = Platform::InterlockedIncrement(&JobLabel); - JobContext context; + // Build job + JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; context.Job = job; + context.JobIndex = 0; context.JobsLeft = jobCount; + context.JobLabel = label; + context.DependantsCount = 0; context.DependenciesLeft = 0; - - JobsLocker.Lock(); - for (int64 dependency : dependencies) + context.JobsCount = jobCount; + context.Dependants.Clear(); { - if (JobContext* dependencyContext = JobContexts.TryGet(dependency)) + JobsLocker.Lock(); + for (int64 dependency : dependencies) { - context.DependenciesLeft++; - dependencyContext->Dependants.Add(label); + JobContext& dependencyContext = JobContexts[GET_CONTEXT_INDEX(dependency)]; + if (Platform::AtomicRead(&dependencyContext.JobLabel) == dependency) + { + Platform::InterlockedIncrement(&dependencyContext.DependantsCount); + dependencyContext.Dependants.Add(label); + context.DependenciesLeft++; + } } + JobsLocker.Unlock(); } - JobContexts.Add(label, MoveTemp(context)); - if (context.DependenciesLeft == 0) - { - // No dependencies left to complete so dispatch now - for (data.Index = 0; data.Index < jobCount; data.Index++) - Jobs.PushBack(data); - } - JobsLocker.Unlock(); + + // Move the job queue forward + Platform::InterlockedIncrement(&JobEndLabel); if (context.DependenciesLeft == 0 && JobStartingOnDispatch) { @@ -369,19 +399,17 @@ int64 JobSystem::Dispatch(const Function& job, Span dependen void JobSystem::Wait() { #if JOB_SYSTEM_ENABLED - JobsLocker.Lock(); - int32 numJobs = JobContexts.Count(); - JobsLocker.Unlock(); + PROFILE_CPU(); + ZoneColor(TracyWaitZoneColor); + int64 numJobs = Platform::AtomicRead(&JobContextsCount); while (numJobs > 0) { WaitMutex.Lock(); WaitSignal.Wait(WaitMutex, 1); WaitMutex.Unlock(); - JobsLocker.Lock(); - numJobs = JobContexts.Count(); - JobsLocker.Unlock(); + numJobs = Platform::AtomicRead(&JobContextsCount); } #endif } @@ -394,12 +422,11 @@ void JobSystem::Wait(int64 label) while (Platform::AtomicRead(&ExitFlag) == 0) { - JobsLocker.Lock(); - const JobContext* context = JobContexts.TryGet(label); - JobsLocker.Unlock(); + const JobContext& context = JobContexts[GET_CONTEXT_INDEX(label)]; + const bool finished = Platform::AtomicRead(&context.JobLabel) != label || Platform::AtomicRead(&context.JobsLeft) <= 0; // Skip if context has been already executed (last job removes it) - if (!context) + if (finished) break; // Wait on signal until input label is not yet done @@ -417,15 +444,10 @@ void JobSystem::SetJobStartingOnDispatch(bool value) { #if JOB_SYSTEM_ENABLED JobStartingOnDispatch = value; - if (value) + if (value && (Platform::AtomicRead(&JobEndLabel) - Platform::AtomicRead(&JobStartLabel)) > 0) { - JobsLocker.Lock(); - const int32 count = Jobs.Count(); - JobsLocker.Unlock(); - if (count == 1) - JobsSignal.NotifyOne(); - else if (count != 0) - JobsSignal.NotifyAll(); + // Wake up threads to start processing jobs that may be already in the queue + JobsSignal.NotifyAll(); } #endif } diff --git a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp index 09ac78e6b..f3d830382 100644 --- a/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp +++ b/Source/Engine/Tools/MaterialGenerator/MaterialGenerator.Material.cpp @@ -384,7 +384,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) // Apply hardness, use 0.991 as max since any value above will result in harsh aliasing auto x2 = writeLocal(ValueType::Float, String::Format(TEXT("saturate((1 - {0}) * (1 / (1 - clamp({1}, 0, 0.991f))))"), x1.Value, hardness.Value), node); - value = writeLocal(ValueType::Float, String::Format(TEXT("{0} ? (1 - {1}) : {1}"), invert.Value, x2.Value), node); + value = writeLocal(ValueType::Float, String::Format(TEXT("select({0}, (1 - {1}), {1})"), invert.Value, x2.Value), node); break; } // Tiling & Offset @@ -459,7 +459,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) auto x = writeLocal(ValueType::Float, String::Format(TEXT("56100000.0f * pow({0}, -1) + 148.0f"), temperature.Value), node); // Value Y - auto y = writeLocal(ValueType::Float, String::Format(TEXT("{0} > 6500.0f ? 35200000.0f * pow({0}, -1) + 184.0f : 100.04f * log({0}) - 623.6f"), temperature.Value), node); + auto y = writeLocal(ValueType::Float, String::Format(TEXT("select({0} > 6500.0f, 35200000.0f * pow({0}, -1) + 184.0f, 100.04f * log({0}) - 623.6f)"), temperature.Value), node); // Value Z auto z = writeLocal(ValueType::Float, String::Format(TEXT("194.18f * log({0}) - 1448.6f"), temperature.Value), node); @@ -467,7 +467,7 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) // Final color auto color = writeLocal(ValueType::Float3, String::Format(TEXT("float3({0}, {1}, {2})"), x.Value, y.Value, z.Value), node); color = writeLocal(ValueType::Float3, String::Format(TEXT("clamp({0}, 0.0f, 255.0f) / 255.0f"), color.Value), node); - value = writeLocal(ValueType::Float3, String::Format(TEXT("{1} < 1000.0f ? {0} * {1}/1000.0f : {0}"), color.Value, temperature.Value), node); + value = writeLocal(ValueType::Float3, String::Format(TEXT("select({1} < 1000.0f, {0} * {1}/1000.0f, {0})"), color.Value, temperature.Value), node); break; } // HSVToRGB @@ -490,8 +490,8 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) const auto rgb = tryGetValue(node->GetBox(0), node->Values[0]).AsFloat3(); const auto epsilon = writeLocal(ValueType::Float, TEXT("1e-10"), node); - auto p = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.g < {0}.b) ? float4({0}.bg, -1.0f, 2.0f/3.0f) : float4({0}.gb, 0.0f, -1.0f/3.0f)"), rgb.Value), node); - auto q = writeLocal(ValueType::Float4, String::Format(TEXT("({0}.r < {1}.x) ? float4({1}.xyw, {0}.r) : float4({0}.r, {1}.yzx)"), rgb.Value, p.Value), node); + auto p = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.g < {0}.b), float4({0}.bg, -1.0f, 2.0f/3.0f), float4({0}.gb, 0.0f, -1.0f/3.0f))"), rgb.Value), node); + auto q = writeLocal(ValueType::Float4, String::Format(TEXT("select(({0}.r < {1}.x), float4({1}.xyw, {0}.r), float4({0}.r, {1}.yzx))"), rgb.Value, p.Value), node); auto c = writeLocal(ValueType::Float, String::Format(TEXT("{0}.x - min({0}.w, {0}.y)"), q.Value), node); auto h = writeLocal(ValueType::Float, String::Format(TEXT("abs(({0}.w - {0}.y) / (6 * {1} + {2}) + {0}.z)"), q.Value, c.Value, epsilon.Value), node); @@ -721,13 +721,13 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) blendFormula = TEXT("1.0 - (1.0 - base) * (1.0 - blend)"); break; case 5: // Overlay - blendFormula = TEXT("base <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)"); + blendFormula = TEXT("select(base <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))"); break; case 6: // Linear Burn blendFormula = TEXT("base + blend - 1.0"); break; case 7: // Linear Light - blendFormula = TEXT("blend < 0.5 ? max(base + (2.0 * blend) - 1.0, 0.0) : min(base + 2.0 * (blend - 0.5), 1.0)"); + blendFormula = TEXT("select(blend < 0.5, max(base + (2.0 * blend) - 1.0, 0.0), min(base + 2.0 * (blend - 0.5), 1.0))"); break; case 8: // Darken blendFormula = TEXT("min(base, blend)"); @@ -745,10 +745,10 @@ void MaterialGenerator::ProcessGroupMaterial(Box* box, Node* node, Value& value) blendFormula = TEXT("base / (blend + 0.000001)"); break; case 13: // Hard Light - blendFormula = TEXT("blend <= 0.5 ? 2.0 * base * blend : 1.0 - 2.0 * (1.0 - base) * (1.0 - blend)"); + blendFormula = TEXT("select(blend <= 0.5, 2.0 * base * blend, 1.0 - 2.0 * (1.0 - base) * (1.0 - blend))"); break; case 14: // Pin Light - blendFormula = TEXT("blend <= 0.5 ? min(base, 2.0 * blend) : max(base, 2.0 * (blend - 0.5))"); + blendFormula = TEXT("select(blend <= 0.5, min(base, 2.0 * blend), max(base, 2.0 * (blend - 0.5)))"); break; case 15: // Hard Mix blendFormula = TEXT("step(1.0 - base, blend)");