diff --git a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp index cdd5b03eb..860ac99d4 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.Custom.cpp @@ -89,7 +89,7 @@ void AnimGraphExecutor::initRuntime() void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& value) { #if USE_CSHARP - auto& context = Context.Get(); + auto& context = *Context.Get(); if (context.ValueCache.TryGet(boxBase, value)) return; auto box = (AnimGraphBox*)boxBase; diff --git a/Source/Engine/Animations/Graph/AnimGraph.cpp b/Source/Engine/Animations/Graph/AnimGraph.cpp index 40c33a3e8..09c03b71a 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.cpp +++ b/Source/Engine/Animations/Graph/AnimGraph.cpp @@ -9,7 +9,7 @@ extern void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 i); -ThreadLocal AnimGraphExecutor::Context; +ThreadLocal AnimGraphExecutor::Context; Transform AnimGraphImpulse::GetNodeModelTransformation(SkeletonData& skeleton, int32 nodeIndex) const { @@ -104,7 +104,7 @@ AnimGraphInstanceData::OutgoingEvent AnimGraphInstanceData::ActiveEvent::End(Ani AnimGraphImpulse* AnimGraphNode::GetNodes(AnimGraphExecutor* executor) { - auto& context = AnimGraphExecutor::Context.Get(); + auto& context = *AnimGraphExecutor::Context.Get(); const int32 count = executor->_skeletonNodesCount; if (context.PoseCacheSize == context.PoseCache.Count()) context.PoseCache.AddOne(); @@ -204,7 +204,10 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt) // Initialize auto& skeleton = _graph.BaseModel->Skeleton; - auto& context = Context.Get(); + auto& contextPtr = Context.Get(); + if (!contextPtr) + contextPtr = New(); + auto& context = *contextPtr; { ANIM_GRAPH_PROFILE_EVENT("Init"); @@ -378,12 +381,12 @@ void AnimGraphExecutor::GetInputValue(Box* box, Value& result) AnimGraphImpulse* AnimGraphExecutor::GetEmptyNodes() { - return &Context.Get().EmptyNodes; + return &Context.Get()->EmptyNodes; } void AnimGraphExecutor::InitNodes(AnimGraphImpulse* nodes) const { - const auto& emptyNodes = Context.Get().EmptyNodes; + const auto& emptyNodes = Context.Get()->EmptyNodes; Platform::MemoryCopy(nodes->Nodes.Get(), emptyNodes.Nodes.Get(), sizeof(Transform) * _skeletonNodesCount); nodes->RootMotion = emptyNodes.RootMotion; nodes->Position = emptyNodes.Position; @@ -405,7 +408,7 @@ void AnimGraphExecutor::ResetBuckets(AnimGraphContext& context, AnimGraphBase* g VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) { - auto& context = Context.Get(); + auto& context = *Context.Get(); // Check if graph is looped or is too deep if (context.CallStack.Count() >= ANIM_GRAPH_MAX_CALL_STACK) @@ -450,6 +453,6 @@ VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) VisjectExecutor::Graph* AnimGraphExecutor::GetCurrentGraph() const { - auto& context = Context.Get(); + auto& context = *Context.Get(); return context.GraphStack.Peek(); } diff --git a/Source/Engine/Animations/Graph/AnimGraph.h b/Source/Engine/Animations/Graph/AnimGraph.h index a774e6b85..f3c148e0a 100644 --- a/Source/Engine/Animations/Graph/AnimGraph.h +++ b/Source/Engine/Animations/Graph/AnimGraph.h @@ -819,7 +819,7 @@ private: int32 _skeletonNodesCount = 0; // Per-thread context to allow async execution - static ThreadLocal Context; + static ThreadLocal Context; public: /// diff --git a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp index 57829e8fd..536ed7d83 100644 --- a/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp +++ b/Source/Engine/Animations/Graph/AnimGroup.Animation.cpp @@ -87,7 +87,7 @@ void AnimGraphExecutor::ProcessAnimEvents(AnimGraphNode* node, bool loop, float if (anim->Events.Count() == 0) return; ANIM_GRAPH_PROFILE_EVENT("Events"); - auto& context = Context.Get(); + auto& context = *Context.Get(); float eventTimeMin = animPrevPos; float eventTimeMax = animPos; if (loop && context.DeltaTime * speed < 0) @@ -231,7 +231,7 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode* const float animPrevPos = GetAnimSamplePos(length, anim, prevPos, speed); // Add to trace - auto& context = Context.Get(); + auto& context = *Context.Get(); if (context.Data->EnableTracing) { auto& trace = context.AddTraceEvent(node); @@ -655,7 +655,7 @@ void ComputeMultiBlendLength(float& length, AnimGraphNode* node) void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); switch (node->TypeID) { // Get @@ -766,7 +766,7 @@ void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& valu void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); auto node = (AnimGraphNode*)nodeBase; switch (node->TypeID) { @@ -790,7 +790,7 @@ void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); if (context.ValueCache.TryGet(boxBase, value)) return; auto box = (AnimGraphBox*)boxBase; @@ -2272,7 +2272,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); if (context.ValueCache.TryGet(boxBase, value)) return; switch (node->TypeID) diff --git a/Source/Engine/Content/Storage/FlaxStorage.cpp b/Source/Engine/Content/Storage/FlaxStorage.cpp index df99418bb..ca39ebaf9 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.cpp +++ b/Source/Engine/Content/Storage/FlaxStorage.cpp @@ -211,7 +211,10 @@ FlaxStorage::~FlaxStorage() #if USE_EDITOR // Ensure to close any outstanding file handles to prevent file locking in case it failed to load - _file.DeleteAll(); + Array streams; + _file.GetValues(streams); + for (FileReadStream* stream : streams) + Delete(stream); #endif } @@ -1264,7 +1267,6 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) } #if ASSETS_LOADING_EXTRA_VERIFICATION - // Validate loaded header (asset ID and type ID must be the same) if (e.ID != data.Header.ID) { @@ -1274,7 +1276,6 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data) { LOG(Error, "Loading asset header data mismatch! Expected Type Name: {0}, loaded header: {1}.\nSource: {2}", e.TypeName, data.Header.ToString(), ToString()); } - #endif return false; @@ -1337,7 +1338,11 @@ bool FlaxStorage::CloseFileHandles() return true; // Failed, someone is still accessing the file // Close file handles (from all threads) - _file.DeleteAll(); + Array streams; + _file.GetValues(streams); + for (FileReadStream* stream : streams) + Delete(stream); + _file.Clear(); return false; } diff --git a/Source/Engine/Content/Storage/FlaxStorage.h b/Source/Engine/Content/Storage/FlaxStorage.h index 77c912c5a..842511430 100644 --- a/Source/Engine/Content/Storage/FlaxStorage.h +++ b/Source/Engine/Content/Storage/FlaxStorage.h @@ -94,7 +94,7 @@ protected: CriticalSection _loadLocker; // Storage - ThreadLocalObject _file; + ThreadLocal _file; Array _chunks; // Metadata diff --git a/Source/Engine/Core/Collections/Sorting.cpp b/Source/Engine/Core/Collections/Sorting.cpp index 49ce0f3d4..85c0fc9f1 100644 --- a/Source/Engine/Core/Collections/Sorting.cpp +++ b/Source/Engine/Core/Collections/Sorting.cpp @@ -5,11 +5,14 @@ #include "Engine/Threading/ThreadLocal.h" // Use a cached storage for the sorting (one per thread to reduce locking) -ThreadLocal SortingStacks; +ThreadLocal SortingStacks; Sorting::SortingStack& Sorting::SortingStack::Get() { - return SortingStacks.Get(); + SortingStack*& stack = SortingStacks.Get(); + if (!stack) + stack = New(); + return *stack; } Sorting::SortingStack::SortingStack() diff --git a/Source/Engine/Engine/EngineService.cpp b/Source/Engine/Engine/EngineService.cpp index 7eea66853..c400ef943 100644 --- a/Source/Engine/Engine/EngineService.cpp +++ b/Source/Engine/Engine/EngineService.cpp @@ -72,9 +72,6 @@ void EngineService::OnInit() // Init services from front to back auto& services = GetServices(); -#if TRACY_ENABLE - Char nameBuffer[100]; -#endif for (int32 i = 0; i < services.Count(); i++) { const auto service = services[i]; @@ -82,6 +79,7 @@ void EngineService::OnInit() #if TRACY_ENABLE ZoneScoped; int32 nameBufferLength = 0; + Char nameBuffer[100]; for (int32 j = 0; j < name.Length(); j++) if (name[j] != ' ') nameBuffer[nameBufferLength++] = name[j]; @@ -114,6 +112,18 @@ void EngineService::OnDispose() const auto service = services[i]; if (service->IsInitialized) { +#if TRACY_ENABLE + ZoneScoped; + const StringView name(service->Name); + int32 nameBufferLength = 0; + Char nameBuffer[100]; + for (int32 j = 0; j < name.Length(); j++) + if (name[j] != ' ') + nameBuffer[nameBufferLength++] = name[j]; + Platform::MemoryCopy(nameBuffer + nameBufferLength, TEXT("::Dispose"), 10 * sizeof(Char)); + nameBufferLength += 10; + ZoneName(nameBuffer, nameBufferLength); +#endif service->IsInitialized = false; service->Dispose(); } diff --git a/Source/Engine/Level/SceneObjectsFactory.cpp b/Source/Engine/Level/SceneObjectsFactory.cpp index d9c86d250..006e54aa6 100644 --- a/Source/Engine/Level/SceneObjectsFactory.cpp +++ b/Source/Engine/Level/SceneObjectsFactory.cpp @@ -63,7 +63,7 @@ SceneObjectsFactory::Context::~Context() { if (Async) { - Array> modifiers; + Array> modifiers; Modifiers.GetValues(modifiers); for (ISerializeModifier* e : modifiers) { diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.ParticleModules.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.ParticleModules.cpp index 6ea4b8b4e..f198fdcd7 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.ParticleModules.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.ParticleModules.cpp @@ -52,7 +52,7 @@ namespace int32 ParticleEmitterGraphCPUExecutor::ProcessSpawnModule(int32 index) { const auto node = _graph.SpawnModules[index]; - auto& context = Context.Get(); + auto& context = *Context.Get(); auto& data = context.Data->SpawnModulesData[index]; // Accumulate the previous frame fraction @@ -120,7 +120,7 @@ int32 ParticleEmitterGraphCPUExecutor::ProcessSpawnModule(int32 index) void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode* node, int32 particlesStart, int32 particlesEnd) { - auto& context = Context.Get(); + auto& context = *Context.Get(); auto stride = context.Data->Buffer->Stride; auto start = context.Data->Buffer->GetParticleCPU(particlesStart); diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.Particles.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.Particles.cpp index cb2d7004e..4c30746c3 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.Particles.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.Particles.cpp @@ -12,7 +12,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); switch (node->TypeID) { // Get @@ -168,7 +168,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupTextures(Box* box, Node* node, void ParticleEmitterGraphCPUExecutor::ProcessGroupTools(Box* box, Node* node, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); switch (node->TypeID) { // Linearize Depth @@ -202,7 +202,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupTools(Box* box, Node* node, Va void ParticleEmitterGraphCPUExecutor::ProcessGroupParticles(Box* box, Node* nodeBase, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); auto node = (ParticleEmitterGraphCPUNode*)nodeBase; switch (node->TypeID) { @@ -468,7 +468,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupParticles(Box* box, Node* node void ParticleEmitterGraphCPUExecutor::ProcessGroupFunction(Box* box, Node* node, Value& value) { - auto& context = Context.Get(); + auto& context = *Context.Get(); switch (node->TypeID) { // Function Input diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp index a8f7898e1..12ecd054b 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.cpp @@ -8,7 +8,7 @@ #include "Engine/Engine/Time.h" #include "Engine/Profiler/ProfilerCPU.h" -ThreadLocal ParticleEmitterGraphCPUExecutor::Context; +ThreadLocal ParticleEmitterGraphCPUExecutor::Context; namespace { @@ -122,7 +122,10 @@ ParticleEmitterGraphCPUExecutor::ParticleEmitterGraphCPUExecutor(ParticleEmitter void ParticleEmitterGraphCPUExecutor::Init(ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, float dt) { - auto& context = Context.Get(); + auto& contextPtr = Context.Get(); + if (!contextPtr) + contextPtr = New(); + auto& context = *contextPtr; context.GraphStack.Clear(); context.GraphStack.Push(&_graph); context.Data = &data; @@ -252,8 +255,8 @@ bool ParticleEmitterGraphCPUExecutor::ComputeBounds(ParticleEmitter* emitter, Pa case 401: { // Prepare graph data - auto& context = Context.Get(); Init(emitter, effect, data); + auto& context = *Context.Get(); // Find the maximum radius of the particle light float maxRadius = 0.0f; @@ -377,7 +380,7 @@ void ParticleEmitterGraphCPUExecutor::Draw(ParticleEmitter* emitter, ParticleEff // Prepare graph data Init(emitter, effect, data); - auto& context = Context.Get(); + auto& context = *Context.Get(); // Draw lights for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.LightModules.Count(); moduleIndex++) @@ -571,7 +574,6 @@ int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, Par PROFILE_CPU_NAMED("Spawn"); // Prepare data - auto& context = Context.Get(); Init(emitter, effect, data, dt); // Spawn particles @@ -587,7 +589,7 @@ int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, Par VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box* box) { // Check if graph is looped or is too deep - auto& context = Context.Get(); + auto& context = *Context.Get(); if (context.CallStackSize >= PARTICLE_EMITTER_MAX_CALL_STACK) { OnError(caller, box, TEXT("Graph is looped or too deep!")); @@ -618,6 +620,6 @@ VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box VisjectExecutor::Graph* ParticleEmitterGraphCPUExecutor::GetCurrentGraph() const { - auto& context = Context.Get(); + auto& context = *Context.Get(); return (Graph*)context.GraphStack.Peek(); } diff --git a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h index 4f55da6e2..34a65d721 100644 --- a/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h +++ b/Source/Engine/Particles/Graph/CPU/ParticleEmitterGraph.CPU.h @@ -133,7 +133,7 @@ private: ParticleEmitterGraphCPU& _graph; // Per-thread context to allow async execution - static ThreadLocal Context; + static ThreadLocal Context; public: /// diff --git a/Source/Engine/Platform/Base/PlatformBase.cpp b/Source/Engine/Platform/Base/PlatformBase.cpp index 9a6b1b9dc..5c1b2ca28 100644 --- a/Source/Engine/Platform/Base/PlatformBase.cpp +++ b/Source/Engine/Platform/Base/PlatformBase.cpp @@ -41,6 +41,10 @@ static_assert(sizeof(bool) == 1, "Invalid bool type size."); static_assert(sizeof(float) == 4, "Invalid float type size."); static_assert(sizeof(double) == 8, "Invalid double type size."); +// Check configuration +static_assert((PLATFORM_THREADS_LIMIT & (PLATFORM_THREADS_LIMIT - 1)) == 0, "Threads limit must be power of two."); +static_assert(PLATFORM_THREADS_LIMIT % 4 == 0, "Threads limit must be multiple of 4."); + float PlatformBase::CustomDpiScale = 1.0f; Array> PlatformBase::Users; Delegate PlatformBase::UserAdded; diff --git a/Source/Engine/Scripting/Scripting.cpp b/Source/Engine/Scripting/Scripting.cpp index 6cabbc0dd..e3a0991e3 100644 --- a/Source/Engine/Scripting/Scripting.cpp +++ b/Source/Engine/Scripting/Scripting.cpp @@ -115,7 +115,7 @@ Action Scripting::ScriptsLoaded; Action Scripting::ScriptsUnload; Action Scripting::ScriptsReloading; Action Scripting::ScriptsReloaded; -ThreadLocal Scripting::ObjectsLookupIdMapping; +ThreadLocal Scripting::ObjectsLookupIdMapping; ScriptingService ScriptingServiceInstance; bool initFlaxEngine(); diff --git a/Source/Engine/Scripting/Scripting.h b/Source/Engine/Scripting/Scripting.h index a22349928..6ed2beca4 100644 --- a/Source/Engine/Scripting/Scripting.h +++ b/Source/Engine/Scripting/Scripting.h @@ -6,7 +6,7 @@ #include "Engine/Scripting/ScriptingType.h" #include "Types.h" -template +template class ThreadLocal; /// @@ -114,7 +114,7 @@ public: /// /// The objects lookup identifier mapping used to override the object ids on FindObject call (used by the object references deserialization). /// - static ThreadLocal ObjectsLookupIdMapping; + static ThreadLocal ObjectsLookupIdMapping; /// /// Finds the object by the given identifier. Searches registered scene objects and optionally assets. Logs warning if fails. diff --git a/Source/Engine/Serialization/FileReadStream.h b/Source/Engine/Serialization/FileReadStream.h index 40a14185b..57287abcf 100644 --- a/Source/Engine/Serialization/FileReadStream.h +++ b/Source/Engine/Serialization/FileReadStream.h @@ -12,7 +12,6 @@ class FLAXENGINE_API FileReadStream : public ReadStream { private: - File* _file; uint32 _virtualPosInBuffer; // Current position in the buffer (index) uint32 _bufferSize; // Amount of loaded bytes from the file to the buffer @@ -33,11 +32,9 @@ public: ~FileReadStream(); public: - /// /// Gets the file handle. /// - /// File FORCE_INLINE const File* GetFile() const { return _file; @@ -49,7 +46,6 @@ public: void Unlink(); public: - /// /// Open file to write data to it /// @@ -58,7 +54,6 @@ public: static FileReadStream* Open(const StringView& path); public: - // [ReadStream] void Flush() final override; void Close() final override; diff --git a/Source/Engine/Threading/JobSystem.cpp b/Source/Engine/Threading/JobSystem.cpp index c89e6aca5..dea8298ce 100644 --- a/Source/Engine/Threading/JobSystem.cpp +++ b/Source/Engine/Threading/JobSystem.cpp @@ -93,7 +93,7 @@ struct TIsPODType namespace { JobSystemService JobSystemInstance; - Thread* Threads[PLATFORM_THREADS_LIMIT] = {}; + Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {}; int32 ThreadsCount = 0; bool JobStartingOnDispatch = true; volatile int64 ExitFlag = 0; diff --git a/Source/Engine/Threading/ThreadLocal.h b/Source/Engine/Threading/ThreadLocal.h index 4de8ced57..aba1f1170 100644 --- a/Source/Engine/Threading/ThreadLocal.h +++ b/Source/Engine/Threading/ThreadLocal.h @@ -6,14 +6,14 @@ #include "Engine/Platform/Platform.h" /// -/// Per-thread local variable storage. -/// Implemented using atomic with per-thread storage indexed via thread id hashing. -/// ForConsider using 'THREADLOCAL' define before the variable instead. +/// Per-thread local variable storage for basic types (POD). Implemented using atomic with per-thread storage indexed via thread id hashing. Consider using 'THREADLOCAL' define before the variable instead. /// -template +template class ThreadLocal { protected: + constexpr static int32 DynamicMaxThreads = 1024; + static_assert(TIsPODType::Value, "Only POD types are supported"); struct Bucket { @@ -21,34 +21,29 @@ protected: T Value; }; - Bucket _buckets[MaxThreads]; + Bucket _staticBuckets[MaxThreads]; + Bucket* _dynamicBuckets = nullptr; public: - ThreadLocal() { - // Clear buckets - if (ClearMemory) - { - Platform::MemoryClear(_buckets, sizeof(_buckets)); - } - else - { - for (int32 i = 0; i < MaxThreads; i++) - _buckets[i].ThreadID = 0; - } + Platform::MemoryClear(_staticBuckets, sizeof(_staticBuckets)); + } + + ~ThreadLocal() + { + Platform::Free(_dynamicBuckets); } public: - - T& Get() + FORCE_INLINE T& Get() { - return _buckets[GetIndex()].Value; + return GetBucket().Value; } - void Set(const T& value) + FORCE_INLINE void Set(const T& value) { - _buckets[GetIndex()].Value = value; + GetBucket().Value = value; } int32 Count() const @@ -56,9 +51,17 @@ public: int32 result = 0; for (int32 i = 0; i < MaxThreads; i++) { - if (Platform::AtomicRead((int64 volatile*)&_buckets[i].ThreadID) != 0) + if (Platform::AtomicRead((int64 volatile*)&_staticBuckets[i].ThreadID) != 0) result++; } + if (auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets)) + { + for (int32 i = 0; i < MaxThreads; i++) + { + if (Platform::AtomicRead((int64 volatile*)&dynamicBuckets[i].ThreadID) != 0) + result++; + } + } return result; } @@ -67,89 +70,72 @@ public: { for (int32 i = 0; i < MaxThreads; i++) { - if (Platform::AtomicRead((int64 volatile*)&_buckets[i].ThreadID) != 0) - result.Add(_buckets[i].Value); + if (Platform::AtomicRead((int64 volatile*)&_staticBuckets[i].ThreadID) != 0) + result.Add(_staticBuckets[i].Value); } + if (auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets)) + { + for (int32 i = 0; i < MaxThreads; i++) + { + if (Platform::AtomicRead((int64 volatile*)&dynamicBuckets[i].ThreadID) != 0) + result.Add(dynamicBuckets[i].Value); + } + } + } + + void Clear() + { + Platform::MemoryClear(_staticBuckets, sizeof(_staticBuckets)); + Platform::Free(_dynamicBuckets); + _dynamicBuckets = nullptr; } protected: - - FORCE_INLINE static int32 Hash(const int64 value) + Bucket& GetBucket() { - return value & (MaxThreads - 1); - } + const int64 key = (int64)Platform::GetCurrentThreadID(); - FORCE_INLINE int32 GetIndex() - { - int64 key = (int64)Platform::GetCurrentThreadID(); - auto index = Hash(key); - while (true) + // Search statically allocated buckets + int32 index = (int32)(key & (MaxThreads - 1)); + int32 spaceLeft = MaxThreads; + while (spaceLeft) { - const int64 value = Platform::AtomicRead(&_buckets[index].ThreadID); + const int64 value = Platform::AtomicRead(&_staticBuckets[index].ThreadID); if (value == key) - break; - if (value == 0 && Platform::InterlockedCompareExchange(&_buckets[index].ThreadID, key, 0) == 0) - break; - index = Hash(index + 1); + return _staticBuckets[index]; + if (value == 0 && Platform::InterlockedCompareExchange(&_staticBuckets[index].ThreadID, key, 0) == 0) + return _staticBuckets[index]; + index = (index + 1) & (MaxThreads - 1); + spaceLeft--; } - return index; - } -}; -/// -/// Per thread local object -/// -template -class ThreadLocalObject : public ThreadLocal -{ -public: - - typedef ThreadLocal Base; - -public: - - void Delete() - { - auto value = Base::Get(); - Base::SetAll(nullptr); - ::Delete(value); - } - - void DeleteAll() - { - for (int32 i = 0; i < MaxThreads; i++) + // Allocate dynamic buckets if missing + DYNAMIC: + auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets); + if (!dynamicBuckets) { - auto& bucket = Base::_buckets[i]; - if (bucket.Value != nullptr) + dynamicBuckets = (Bucket*)Platform::Allocate(DynamicMaxThreads * sizeof(Bucket), 16); + Platform::MemoryClear(dynamicBuckets, DynamicMaxThreads * sizeof(Bucket)); + if (Platform::InterlockedCompareExchange((intptr volatile*)&_dynamicBuckets, (intptr)dynamicBuckets, 0) != 0) { - ::Delete(bucket.Value); - bucket.ThreadID = 0; - bucket.Value = nullptr; + Platform::Free(dynamicBuckets); + goto DYNAMIC; } } - } - template - void GetNotNullValues(Array& result) const - { - result.EnsureCapacity(MaxThreads); - for (int32 i = 0; i < MaxThreads; i++) + // Search dynamically allocated buckets + index = (int32)(key & (DynamicMaxThreads - 1)); + spaceLeft = DynamicMaxThreads; + while (spaceLeft) { - if (Base::_buckets[i].Value != nullptr) - { - result.Add(Base::_buckets[i].Value); - } + const int64 value = Platform::AtomicRead(&dynamicBuckets[index].ThreadID); + if (value == key) + return dynamicBuckets[index]; + if (value == 0 && Platform::InterlockedCompareExchange(&dynamicBuckets[index].ThreadID, key, 0) == 0) + return dynamicBuckets[index]; + index = (index + 1) & (DynamicMaxThreads - 1); + spaceLeft--; } - } - - int32 CountNotNullValues() const - { - int32 result = 0; - for (int32 i = 0; i < MaxThreads; i++) - { - if (Base::_buckets[i].Value != nullptr) - result++; - } - return result; + return *(Bucket*)nullptr; } }; diff --git a/Source/Engine/Threading/ThreadPool.cpp b/Source/Engine/Threading/ThreadPool.cpp index b7db81ffa..2b6ed5e26 100644 --- a/Source/Engine/Threading/ThreadPool.cpp +++ b/Source/Engine/Threading/ThreadPool.cpp @@ -58,7 +58,7 @@ ThreadPoolService ThreadPoolServiceInstance; bool ThreadPoolService::Init() { // Spawn threads - const int32 numThreads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT); + const int32 numThreads = Math::Clamp(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT / 2); LOG(Info, "Spawning {0} Thread Pool workers", numThreads); for (int32 i = ThreadPoolImpl::Threads.Count(); i < numThreads; i++) {