Refactor ThreadLocal when running on hardware with more cores than PLATFORM_THREADS_LIMIT

This commit is contained in:
Wojtek Figat
2024-02-07 23:39:02 +01:00
parent eed780a0b0
commit 082768d08c
20 changed files with 147 additions and 139 deletions

View File

@@ -89,7 +89,7 @@ void AnimGraphExecutor::initRuntime()
void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& value) void AnimGraphExecutor::ProcessGroupCustom(Box* boxBase, Node* nodeBase, Value& value)
{ {
#if USE_CSHARP #if USE_CSHARP
auto& context = Context.Get(); auto& context = *Context.Get();
if (context.ValueCache.TryGet(boxBase, value)) if (context.ValueCache.TryGet(boxBase, value))
return; return;
auto box = (AnimGraphBox*)boxBase; auto box = (AnimGraphBox*)boxBase;

View File

@@ -9,7 +9,7 @@
extern void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 i); extern void RetargetSkeletonNode(const SkeletonData& sourceSkeleton, const SkeletonData& targetSkeleton, const SkinnedModel::SkeletonMapping& sourceMapping, Transform& node, int32 i);
ThreadLocal<AnimGraphContext> AnimGraphExecutor::Context; ThreadLocal<AnimGraphContext*> AnimGraphExecutor::Context;
Transform AnimGraphImpulse::GetNodeModelTransformation(SkeletonData& skeleton, int32 nodeIndex) const Transform AnimGraphImpulse::GetNodeModelTransformation(SkeletonData& skeleton, int32 nodeIndex) const
{ {
@@ -104,7 +104,7 @@ AnimGraphInstanceData::OutgoingEvent AnimGraphInstanceData::ActiveEvent::End(Ani
AnimGraphImpulse* AnimGraphNode::GetNodes(AnimGraphExecutor* executor) AnimGraphImpulse* AnimGraphNode::GetNodes(AnimGraphExecutor* executor)
{ {
auto& context = AnimGraphExecutor::Context.Get(); auto& context = *AnimGraphExecutor::Context.Get();
const int32 count = executor->_skeletonNodesCount; const int32 count = executor->_skeletonNodesCount;
if (context.PoseCacheSize == context.PoseCache.Count()) if (context.PoseCacheSize == context.PoseCache.Count())
context.PoseCache.AddOne(); context.PoseCache.AddOne();
@@ -204,7 +204,10 @@ void AnimGraphExecutor::Update(AnimGraphInstanceData& data, float dt)
// Initialize // Initialize
auto& skeleton = _graph.BaseModel->Skeleton; auto& skeleton = _graph.BaseModel->Skeleton;
auto& context = Context.Get(); auto& contextPtr = Context.Get();
if (!contextPtr)
contextPtr = New<AnimGraphContext>();
auto& context = *contextPtr;
{ {
ANIM_GRAPH_PROFILE_EVENT("Init"); ANIM_GRAPH_PROFILE_EVENT("Init");
@@ -378,12 +381,12 @@ void AnimGraphExecutor::GetInputValue(Box* box, Value& result)
AnimGraphImpulse* AnimGraphExecutor::GetEmptyNodes() AnimGraphImpulse* AnimGraphExecutor::GetEmptyNodes()
{ {
return &Context.Get().EmptyNodes; return &Context.Get()->EmptyNodes;
} }
void AnimGraphExecutor::InitNodes(AnimGraphImpulse* nodes) const void AnimGraphExecutor::InitNodes(AnimGraphImpulse* nodes) const
{ {
const auto& emptyNodes = Context.Get().EmptyNodes; const auto& emptyNodes = Context.Get()->EmptyNodes;
Platform::MemoryCopy(nodes->Nodes.Get(), emptyNodes.Nodes.Get(), sizeof(Transform) * _skeletonNodesCount); Platform::MemoryCopy(nodes->Nodes.Get(), emptyNodes.Nodes.Get(), sizeof(Transform) * _skeletonNodesCount);
nodes->RootMotion = emptyNodes.RootMotion; nodes->RootMotion = emptyNodes.RootMotion;
nodes->Position = emptyNodes.Position; nodes->Position = emptyNodes.Position;
@@ -405,7 +408,7 @@ void AnimGraphExecutor::ResetBuckets(AnimGraphContext& context, AnimGraphBase* g
VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box) VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
// Check if graph is looped or is too deep // Check if graph is looped or is too deep
if (context.CallStack.Count() >= ANIM_GRAPH_MAX_CALL_STACK) if (context.CallStack.Count() >= ANIM_GRAPH_MAX_CALL_STACK)
@@ -450,6 +453,6 @@ VisjectExecutor::Value AnimGraphExecutor::eatBox(Node* caller, Box* box)
VisjectExecutor::Graph* AnimGraphExecutor::GetCurrentGraph() const VisjectExecutor::Graph* AnimGraphExecutor::GetCurrentGraph() const
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
return context.GraphStack.Peek(); return context.GraphStack.Peek();
} }

View File

@@ -819,7 +819,7 @@ private:
int32 _skeletonNodesCount = 0; int32 _skeletonNodesCount = 0;
// Per-thread context to allow async execution // Per-thread context to allow async execution
static ThreadLocal<AnimGraphContext> Context; static ThreadLocal<AnimGraphContext*> Context;
public: public:
/// <summary> /// <summary>

View File

@@ -87,7 +87,7 @@ void AnimGraphExecutor::ProcessAnimEvents(AnimGraphNode* node, bool loop, float
if (anim->Events.Count() == 0) if (anim->Events.Count() == 0)
return; return;
ANIM_GRAPH_PROFILE_EVENT("Events"); ANIM_GRAPH_PROFILE_EVENT("Events");
auto& context = Context.Get(); auto& context = *Context.Get();
float eventTimeMin = animPrevPos; float eventTimeMin = animPrevPos;
float eventTimeMax = animPos; float eventTimeMax = animPos;
if (loop && context.DeltaTime * speed < 0) if (loop && context.DeltaTime * speed < 0)
@@ -231,7 +231,7 @@ void AnimGraphExecutor::ProcessAnimation(AnimGraphImpulse* nodes, AnimGraphNode*
const float animPrevPos = GetAnimSamplePos(length, anim, prevPos, speed); const float animPrevPos = GetAnimSamplePos(length, anim, prevPos, speed);
// Add to trace // Add to trace
auto& context = Context.Get(); auto& context = *Context.Get();
if (context.Data->EnableTracing) if (context.Data->EnableTracing)
{ {
auto& trace = context.AddTraceEvent(node); auto& trace = context.AddTraceEvent(node);
@@ -655,7 +655,7 @@ void ComputeMultiBlendLength(float& length, AnimGraphNode* node)
void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value) void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
switch (node->TypeID) switch (node->TypeID)
{ {
// Get // Get
@@ -766,7 +766,7 @@ void AnimGraphExecutor::ProcessGroupParameters(Box* box, Node* node, Value& valu
void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value) void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
auto node = (AnimGraphNode*)nodeBase; auto node = (AnimGraphNode*)nodeBase;
switch (node->TypeID) switch (node->TypeID)
{ {
@@ -790,7 +790,7 @@ void AnimGraphExecutor::ProcessGroupTools(Box* box, Node* nodeBase, Value& value
void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Value& value) void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
if (context.ValueCache.TryGet(boxBase, value)) if (context.ValueCache.TryGet(boxBase, value))
return; return;
auto box = (AnimGraphBox*)boxBase; auto box = (AnimGraphBox*)boxBase;
@@ -2272,7 +2272,7 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu
void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& value) void AnimGraphExecutor::ProcessGroupFunction(Box* boxBase, Node* node, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
if (context.ValueCache.TryGet(boxBase, value)) if (context.ValueCache.TryGet(boxBase, value))
return; return;
switch (node->TypeID) switch (node->TypeID)

View File

@@ -211,7 +211,10 @@ FlaxStorage::~FlaxStorage()
#if USE_EDITOR #if USE_EDITOR
// Ensure to close any outstanding file handles to prevent file locking in case it failed to load // Ensure to close any outstanding file handles to prevent file locking in case it failed to load
_file.DeleteAll(); Array<FileReadStream*> streams;
_file.GetValues(streams);
for (FileReadStream* stream : streams)
Delete(stream);
#endif #endif
} }
@@ -1264,7 +1267,6 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data)
} }
#if ASSETS_LOADING_EXTRA_VERIFICATION #if ASSETS_LOADING_EXTRA_VERIFICATION
// Validate loaded header (asset ID and type ID must be the same) // Validate loaded header (asset ID and type ID must be the same)
if (e.ID != data.Header.ID) if (e.ID != data.Header.ID)
{ {
@@ -1274,7 +1276,6 @@ bool FlaxStorage::LoadAssetHeader(const Entry& e, AssetInitData& data)
{ {
LOG(Error, "Loading asset header data mismatch! Expected Type Name: {0}, loaded header: {1}.\nSource: {2}", e.TypeName, data.Header.ToString(), ToString()); LOG(Error, "Loading asset header data mismatch! Expected Type Name: {0}, loaded header: {1}.\nSource: {2}", e.TypeName, data.Header.ToString(), ToString());
} }
#endif #endif
return false; return false;
@@ -1337,7 +1338,11 @@ bool FlaxStorage::CloseFileHandles()
return true; // Failed, someone is still accessing the file return true; // Failed, someone is still accessing the file
// Close file handles (from all threads) // Close file handles (from all threads)
_file.DeleteAll(); Array<FileReadStream*> streams;
_file.GetValues(streams);
for (FileReadStream* stream : streams)
Delete(stream);
_file.Clear();
return false; return false;
} }

View File

@@ -94,7 +94,7 @@ protected:
CriticalSection _loadLocker; CriticalSection _loadLocker;
// Storage // Storage
ThreadLocalObject<FileReadStream> _file; ThreadLocal<FileReadStream*> _file;
Array<FlaxChunk*> _chunks; Array<FlaxChunk*> _chunks;
// Metadata // Metadata

View File

@@ -5,11 +5,14 @@
#include "Engine/Threading/ThreadLocal.h" #include "Engine/Threading/ThreadLocal.h"
// Use a cached storage for the sorting (one per thread to reduce locking) // Use a cached storage for the sorting (one per thread to reduce locking)
ThreadLocal<Sorting::SortingStack> SortingStacks; ThreadLocal<Sorting::SortingStack*> SortingStacks;
Sorting::SortingStack& Sorting::SortingStack::Get() Sorting::SortingStack& Sorting::SortingStack::Get()
{ {
return SortingStacks.Get(); SortingStack*& stack = SortingStacks.Get();
if (!stack)
stack = New<SortingStack>();
return *stack;
} }
Sorting::SortingStack::SortingStack() Sorting::SortingStack::SortingStack()

View File

@@ -72,9 +72,6 @@ void EngineService::OnInit()
// Init services from front to back // Init services from front to back
auto& services = GetServices(); auto& services = GetServices();
#if TRACY_ENABLE
Char nameBuffer[100];
#endif
for (int32 i = 0; i < services.Count(); i++) for (int32 i = 0; i < services.Count(); i++)
{ {
const auto service = services[i]; const auto service = services[i];
@@ -82,6 +79,7 @@ void EngineService::OnInit()
#if TRACY_ENABLE #if TRACY_ENABLE
ZoneScoped; ZoneScoped;
int32 nameBufferLength = 0; int32 nameBufferLength = 0;
Char nameBuffer[100];
for (int32 j = 0; j < name.Length(); j++) for (int32 j = 0; j < name.Length(); j++)
if (name[j] != ' ') if (name[j] != ' ')
nameBuffer[nameBufferLength++] = name[j]; nameBuffer[nameBufferLength++] = name[j];
@@ -114,6 +112,18 @@ void EngineService::OnDispose()
const auto service = services[i]; const auto service = services[i];
if (service->IsInitialized) if (service->IsInitialized)
{ {
#if TRACY_ENABLE
ZoneScoped;
const StringView name(service->Name);
int32 nameBufferLength = 0;
Char nameBuffer[100];
for (int32 j = 0; j < name.Length(); j++)
if (name[j] != ' ')
nameBuffer[nameBufferLength++] = name[j];
Platform::MemoryCopy(nameBuffer + nameBufferLength, TEXT("::Dispose"), 10 * sizeof(Char));
nameBufferLength += 10;
ZoneName(nameBuffer, nameBufferLength);
#endif
service->IsInitialized = false; service->IsInitialized = false;
service->Dispose(); service->Dispose();
} }

View File

@@ -63,7 +63,7 @@ SceneObjectsFactory::Context::~Context()
{ {
if (Async) if (Async)
{ {
Array<ISerializeModifier*, FixedAllocation<PLATFORM_THREADS_LIMIT>> modifiers; Array<ISerializeModifier*, InlinedAllocation<PLATFORM_THREADS_LIMIT>> modifiers;
Modifiers.GetValues(modifiers); Modifiers.GetValues(modifiers);
for (ISerializeModifier* e : modifiers) for (ISerializeModifier* e : modifiers)
{ {

View File

@@ -52,7 +52,7 @@ namespace
int32 ParticleEmitterGraphCPUExecutor::ProcessSpawnModule(int32 index) int32 ParticleEmitterGraphCPUExecutor::ProcessSpawnModule(int32 index)
{ {
const auto node = _graph.SpawnModules[index]; const auto node = _graph.SpawnModules[index];
auto& context = Context.Get(); auto& context = *Context.Get();
auto& data = context.Data->SpawnModulesData[index]; auto& data = context.Data->SpawnModulesData[index];
// Accumulate the previous frame fraction // Accumulate the previous frame fraction
@@ -120,7 +120,7 @@ int32 ParticleEmitterGraphCPUExecutor::ProcessSpawnModule(int32 index)
void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode* node, int32 particlesStart, int32 particlesEnd) void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode* node, int32 particlesStart, int32 particlesEnd)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
auto stride = context.Data->Buffer->Stride; auto stride = context.Data->Buffer->Stride;
auto start = context.Data->Buffer->GetParticleCPU(particlesStart); auto start = context.Data->Buffer->GetParticleCPU(particlesStart);

View File

@@ -12,7 +12,7 @@
void ParticleEmitterGraphCPUExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value) void ParticleEmitterGraphCPUExecutor::ProcessGroupParameters(Box* box, Node* node, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
switch (node->TypeID) switch (node->TypeID)
{ {
// Get // Get
@@ -168,7 +168,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupTextures(Box* box, Node* node,
void ParticleEmitterGraphCPUExecutor::ProcessGroupTools(Box* box, Node* node, Value& value) void ParticleEmitterGraphCPUExecutor::ProcessGroupTools(Box* box, Node* node, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
switch (node->TypeID) switch (node->TypeID)
{ {
// Linearize Depth // Linearize Depth
@@ -202,7 +202,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupTools(Box* box, Node* node, Va
void ParticleEmitterGraphCPUExecutor::ProcessGroupParticles(Box* box, Node* nodeBase, Value& value) void ParticleEmitterGraphCPUExecutor::ProcessGroupParticles(Box* box, Node* nodeBase, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
auto node = (ParticleEmitterGraphCPUNode*)nodeBase; auto node = (ParticleEmitterGraphCPUNode*)nodeBase;
switch (node->TypeID) switch (node->TypeID)
{ {
@@ -468,7 +468,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupParticles(Box* box, Node* node
void ParticleEmitterGraphCPUExecutor::ProcessGroupFunction(Box* box, Node* node, Value& value) void ParticleEmitterGraphCPUExecutor::ProcessGroupFunction(Box* box, Node* node, Value& value)
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
switch (node->TypeID) switch (node->TypeID)
{ {
// Function Input // Function Input

View File

@@ -8,7 +8,7 @@
#include "Engine/Engine/Time.h" #include "Engine/Engine/Time.h"
#include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Profiler/ProfilerCPU.h"
ThreadLocal<ParticleEmitterGraphCPUContext> ParticleEmitterGraphCPUExecutor::Context; ThreadLocal<ParticleEmitterGraphCPUContext*> ParticleEmitterGraphCPUExecutor::Context;
namespace namespace
{ {
@@ -122,7 +122,10 @@ ParticleEmitterGraphCPUExecutor::ParticleEmitterGraphCPUExecutor(ParticleEmitter
void ParticleEmitterGraphCPUExecutor::Init(ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, float dt) void ParticleEmitterGraphCPUExecutor::Init(ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, float dt)
{ {
auto& context = Context.Get(); auto& contextPtr = Context.Get();
if (!contextPtr)
contextPtr = New<ParticleEmitterGraphCPUContext>();
auto& context = *contextPtr;
context.GraphStack.Clear(); context.GraphStack.Clear();
context.GraphStack.Push(&_graph); context.GraphStack.Push(&_graph);
context.Data = &data; context.Data = &data;
@@ -252,8 +255,8 @@ bool ParticleEmitterGraphCPUExecutor::ComputeBounds(ParticleEmitter* emitter, Pa
case 401: case 401:
{ {
// Prepare graph data // Prepare graph data
auto& context = Context.Get();
Init(emitter, effect, data); Init(emitter, effect, data);
auto& context = *Context.Get();
// Find the maximum radius of the particle light // Find the maximum radius of the particle light
float maxRadius = 0.0f; float maxRadius = 0.0f;
@@ -377,7 +380,7 @@ void ParticleEmitterGraphCPUExecutor::Draw(ParticleEmitter* emitter, ParticleEff
// Prepare graph data // Prepare graph data
Init(emitter, effect, data); Init(emitter, effect, data);
auto& context = Context.Get(); auto& context = *Context.Get();
// Draw lights // Draw lights
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.LightModules.Count(); moduleIndex++) for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.LightModules.Count(); moduleIndex++)
@@ -571,7 +574,6 @@ int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, Par
PROFILE_CPU_NAMED("Spawn"); PROFILE_CPU_NAMED("Spawn");
// Prepare data // Prepare data
auto& context = Context.Get();
Init(emitter, effect, data, dt); Init(emitter, effect, data, dt);
// Spawn particles // Spawn particles
@@ -587,7 +589,7 @@ int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, Par
VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box* box) VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box* box)
{ {
// Check if graph is looped or is too deep // Check if graph is looped or is too deep
auto& context = Context.Get(); auto& context = *Context.Get();
if (context.CallStackSize >= PARTICLE_EMITTER_MAX_CALL_STACK) if (context.CallStackSize >= PARTICLE_EMITTER_MAX_CALL_STACK)
{ {
OnError(caller, box, TEXT("Graph is looped or too deep!")); OnError(caller, box, TEXT("Graph is looped or too deep!"));
@@ -618,6 +620,6 @@ VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box
VisjectExecutor::Graph* ParticleEmitterGraphCPUExecutor::GetCurrentGraph() const VisjectExecutor::Graph* ParticleEmitterGraphCPUExecutor::GetCurrentGraph() const
{ {
auto& context = Context.Get(); auto& context = *Context.Get();
return (Graph*)context.GraphStack.Peek(); return (Graph*)context.GraphStack.Peek();
} }

View File

@@ -133,7 +133,7 @@ private:
ParticleEmitterGraphCPU& _graph; ParticleEmitterGraphCPU& _graph;
// Per-thread context to allow async execution // Per-thread context to allow async execution
static ThreadLocal<ParticleEmitterGraphCPUContext> Context; static ThreadLocal<ParticleEmitterGraphCPUContext*> Context;
public: public:
/// <summary> /// <summary>

View File

@@ -41,6 +41,10 @@ static_assert(sizeof(bool) == 1, "Invalid bool type size.");
static_assert(sizeof(float) == 4, "Invalid float type size."); static_assert(sizeof(float) == 4, "Invalid float type size.");
static_assert(sizeof(double) == 8, "Invalid double type size."); static_assert(sizeof(double) == 8, "Invalid double type size.");
// Check configuration
static_assert((PLATFORM_THREADS_LIMIT & (PLATFORM_THREADS_LIMIT - 1)) == 0, "Threads limit must be power of two.");
static_assert(PLATFORM_THREADS_LIMIT % 4 == 0, "Threads limit must be multiple of 4.");
float PlatformBase::CustomDpiScale = 1.0f; float PlatformBase::CustomDpiScale = 1.0f;
Array<User*, FixedAllocation<8>> PlatformBase::Users; Array<User*, FixedAllocation<8>> PlatformBase::Users;
Delegate<User*> PlatformBase::UserAdded; Delegate<User*> PlatformBase::UserAdded;

View File

@@ -115,7 +115,7 @@ Action Scripting::ScriptsLoaded;
Action Scripting::ScriptsUnload; Action Scripting::ScriptsUnload;
Action Scripting::ScriptsReloading; Action Scripting::ScriptsReloading;
Action Scripting::ScriptsReloaded; Action Scripting::ScriptsReloaded;
ThreadLocal<Scripting::IdsMappingTable*, PLATFORM_THREADS_LIMIT, true> Scripting::ObjectsLookupIdMapping; ThreadLocal<Scripting::IdsMappingTable*, PLATFORM_THREADS_LIMIT> Scripting::ObjectsLookupIdMapping;
ScriptingService ScriptingServiceInstance; ScriptingService ScriptingServiceInstance;
bool initFlaxEngine(); bool initFlaxEngine();

View File

@@ -6,7 +6,7 @@
#include "Engine/Scripting/ScriptingType.h" #include "Engine/Scripting/ScriptingType.h"
#include "Types.h" #include "Types.h"
template<typename T, int32 MaxThreads, bool ClearMemory> template<typename T, int32 MaxThreads>
class ThreadLocal; class ThreadLocal;
/// <summary> /// <summary>
@@ -114,7 +114,7 @@ public:
/// <summary> /// <summary>
/// The objects lookup identifier mapping used to override the object ids on FindObject call (used by the object references deserialization). /// The objects lookup identifier mapping used to override the object ids on FindObject call (used by the object references deserialization).
/// </summary> /// </summary>
static ThreadLocal<IdsMappingTable*, PLATFORM_THREADS_LIMIT, true> ObjectsLookupIdMapping; static ThreadLocal<IdsMappingTable*, PLATFORM_THREADS_LIMIT> ObjectsLookupIdMapping;
/// <summary> /// <summary>
/// Finds the object by the given identifier. Searches registered scene objects and optionally assets. Logs warning if fails. /// Finds the object by the given identifier. Searches registered scene objects and optionally assets. Logs warning if fails.

View File

@@ -12,7 +12,6 @@
class FLAXENGINE_API FileReadStream : public ReadStream class FLAXENGINE_API FileReadStream : public ReadStream
{ {
private: private:
File* _file; File* _file;
uint32 _virtualPosInBuffer; // Current position in the buffer (index) uint32 _virtualPosInBuffer; // Current position in the buffer (index)
uint32 _bufferSize; // Amount of loaded bytes from the file to the buffer uint32 _bufferSize; // Amount of loaded bytes from the file to the buffer
@@ -33,11 +32,9 @@ public:
~FileReadStream(); ~FileReadStream();
public: public:
/// <summary> /// <summary>
/// Gets the file handle. /// Gets the file handle.
/// </summary> /// </summary>
/// <returns>File</returns>
FORCE_INLINE const File* GetFile() const FORCE_INLINE const File* GetFile() const
{ {
return _file; return _file;
@@ -49,7 +46,6 @@ public:
void Unlink(); void Unlink();
public: public:
/// <summary> /// <summary>
/// Open file to write data to it /// Open file to write data to it
/// </summary> /// </summary>
@@ -58,7 +54,6 @@ public:
static FileReadStream* Open(const StringView& path); static FileReadStream* Open(const StringView& path);
public: public:
// [ReadStream] // [ReadStream]
void Flush() final override; void Flush() final override;
void Close() final override; void Close() final override;

View File

@@ -93,7 +93,7 @@ struct TIsPODType<JobContext>
namespace namespace
{ {
JobSystemService JobSystemInstance; JobSystemService JobSystemInstance;
Thread* Threads[PLATFORM_THREADS_LIMIT] = {}; Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {};
int32 ThreadsCount = 0; int32 ThreadsCount = 0;
bool JobStartingOnDispatch = true; bool JobStartingOnDispatch = true;
volatile int64 ExitFlag = 0; volatile int64 ExitFlag = 0;

View File

@@ -6,14 +6,14 @@
#include "Engine/Platform/Platform.h" #include "Engine/Platform/Platform.h"
/// <summary> /// <summary>
/// Per-thread local variable storage. /// Per-thread local variable storage for basic types (POD). Implemented using atomic with per-thread storage indexed via thread id hashing. Consider using 'THREADLOCAL' define before the variable instead.
/// Implemented using atomic with per-thread storage indexed via thread id hashing.
/// ForConsider using 'THREADLOCAL' define before the variable instead.
/// </summary> /// </summary>
template<typename T, int32 MaxThreads = PLATFORM_THREADS_LIMIT, bool ClearMemory = true> template<typename T, int32 MaxThreads = PLATFORM_THREADS_LIMIT>
class ThreadLocal class ThreadLocal
{ {
protected: protected:
constexpr static int32 DynamicMaxThreads = 1024;
static_assert(TIsPODType<T>::Value, "Only POD types are supported");
struct Bucket struct Bucket
{ {
@@ -21,34 +21,29 @@ protected:
T Value; T Value;
}; };
Bucket _buckets[MaxThreads]; Bucket _staticBuckets[MaxThreads];
Bucket* _dynamicBuckets = nullptr;
public: public:
ThreadLocal() ThreadLocal()
{ {
// Clear buckets Platform::MemoryClear(_staticBuckets, sizeof(_staticBuckets));
if (ClearMemory) }
{
Platform::MemoryClear(_buckets, sizeof(_buckets)); ~ThreadLocal()
} {
else Platform::Free(_dynamicBuckets);
{
for (int32 i = 0; i < MaxThreads; i++)
_buckets[i].ThreadID = 0;
}
} }
public: public:
FORCE_INLINE T& Get()
T& Get()
{ {
return _buckets[GetIndex()].Value; return GetBucket().Value;
} }
void Set(const T& value) FORCE_INLINE void Set(const T& value)
{ {
_buckets[GetIndex()].Value = value; GetBucket().Value = value;
} }
int32 Count() const int32 Count() const
@@ -56,9 +51,17 @@ public:
int32 result = 0; int32 result = 0;
for (int32 i = 0; i < MaxThreads; i++) for (int32 i = 0; i < MaxThreads; i++)
{ {
if (Platform::AtomicRead((int64 volatile*)&_buckets[i].ThreadID) != 0) if (Platform::AtomicRead((int64 volatile*)&_staticBuckets[i].ThreadID) != 0)
result++; result++;
} }
if (auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets))
{
for (int32 i = 0; i < MaxThreads; i++)
{
if (Platform::AtomicRead((int64 volatile*)&dynamicBuckets[i].ThreadID) != 0)
result++;
}
}
return result; return result;
} }
@@ -67,89 +70,72 @@ public:
{ {
for (int32 i = 0; i < MaxThreads; i++) for (int32 i = 0; i < MaxThreads; i++)
{ {
if (Platform::AtomicRead((int64 volatile*)&_buckets[i].ThreadID) != 0) if (Platform::AtomicRead((int64 volatile*)&_staticBuckets[i].ThreadID) != 0)
result.Add(_buckets[i].Value); result.Add(_staticBuckets[i].Value);
} }
if (auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets))
{
for (int32 i = 0; i < MaxThreads; i++)
{
if (Platform::AtomicRead((int64 volatile*)&dynamicBuckets[i].ThreadID) != 0)
result.Add(dynamicBuckets[i].Value);
}
}
}
void Clear()
{
Platform::MemoryClear(_staticBuckets, sizeof(_staticBuckets));
Platform::Free(_dynamicBuckets);
_dynamicBuckets = nullptr;
} }
protected: protected:
Bucket& GetBucket()
FORCE_INLINE static int32 Hash(const int64 value)
{ {
return value & (MaxThreads - 1); const int64 key = (int64)Platform::GetCurrentThreadID();
}
FORCE_INLINE int32 GetIndex() // Search statically allocated buckets
{ int32 index = (int32)(key & (MaxThreads - 1));
int64 key = (int64)Platform::GetCurrentThreadID(); int32 spaceLeft = MaxThreads;
auto index = Hash(key); while (spaceLeft)
while (true)
{ {
const int64 value = Platform::AtomicRead(&_buckets[index].ThreadID); const int64 value = Platform::AtomicRead(&_staticBuckets[index].ThreadID);
if (value == key) if (value == key)
break; return _staticBuckets[index];
if (value == 0 && Platform::InterlockedCompareExchange(&_buckets[index].ThreadID, key, 0) == 0) if (value == 0 && Platform::InterlockedCompareExchange(&_staticBuckets[index].ThreadID, key, 0) == 0)
break; return _staticBuckets[index];
index = Hash(index + 1); index = (index + 1) & (MaxThreads - 1);
spaceLeft--;
} }
return index;
}
};
/// <summary> // Allocate dynamic buckets if missing
/// Per thread local object DYNAMIC:
/// </summary> auto dynamicBuckets = (Bucket*)Platform::AtomicRead((intptr volatile*)&_dynamicBuckets);
template<typename T, int32 MaxThreads = PLATFORM_THREADS_LIMIT> if (!dynamicBuckets)
class ThreadLocalObject : public ThreadLocal<T*, MaxThreads>
{
public:
typedef ThreadLocal<T*, MaxThreads> Base;
public:
void Delete()
{
auto value = Base::Get();
Base::SetAll(nullptr);
::Delete(value);
}
void DeleteAll()
{
for (int32 i = 0; i < MaxThreads; i++)
{ {
auto& bucket = Base::_buckets[i]; dynamicBuckets = (Bucket*)Platform::Allocate(DynamicMaxThreads * sizeof(Bucket), 16);
if (bucket.Value != nullptr) Platform::MemoryClear(dynamicBuckets, DynamicMaxThreads * sizeof(Bucket));
if (Platform::InterlockedCompareExchange((intptr volatile*)&_dynamicBuckets, (intptr)dynamicBuckets, 0) != 0)
{ {
::Delete(bucket.Value); Platform::Free(dynamicBuckets);
bucket.ThreadID = 0; goto DYNAMIC;
bucket.Value = nullptr;
} }
} }
}
template<typename AllocationType = HeapAllocation> // Search dynamically allocated buckets
void GetNotNullValues(Array<T*, AllocationType>& result) const index = (int32)(key & (DynamicMaxThreads - 1));
{ spaceLeft = DynamicMaxThreads;
result.EnsureCapacity(MaxThreads); while (spaceLeft)
for (int32 i = 0; i < MaxThreads; i++)
{ {
if (Base::_buckets[i].Value != nullptr) const int64 value = Platform::AtomicRead(&dynamicBuckets[index].ThreadID);
{ if (value == key)
result.Add(Base::_buckets[i].Value); return dynamicBuckets[index];
} if (value == 0 && Platform::InterlockedCompareExchange(&dynamicBuckets[index].ThreadID, key, 0) == 0)
return dynamicBuckets[index];
index = (index + 1) & (DynamicMaxThreads - 1);
spaceLeft--;
} }
} return *(Bucket*)nullptr;
int32 CountNotNullValues() const
{
int32 result = 0;
for (int32 i = 0; i < MaxThreads; i++)
{
if (Base::_buckets[i].Value != nullptr)
result++;
}
return result;
} }
}; };

View File

@@ -58,7 +58,7 @@ ThreadPoolService ThreadPoolServiceInstance;
bool ThreadPoolService::Init() bool ThreadPoolService::Init()
{ {
// Spawn threads // Spawn threads
const int32 numThreads = Math::Clamp<int32>(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT); const int32 numThreads = Math::Clamp<int32>(Platform::GetCPUInfo().ProcessorCoreCount - 1, 2, PLATFORM_THREADS_LIMIT / 2);
LOG(Info, "Spawning {0} Thread Pool workers", numThreads); LOG(Info, "Spawning {0} Thread Pool workers", numThreads);
for (int32 i = ThreadPoolImpl::Threads.Count(); i < numThreads; i++) for (int32 i = ThreadPoolImpl::Threads.Count(); i < numThreads; i++)
{ {