Optimize job system memory allocations

This commit is contained in:
Wojtek Figat
2024-06-24 23:19:01 +02:00
parent 3bbaa8dad0
commit b545d8800c
5 changed files with 145 additions and 95 deletions

View File

@@ -5,6 +5,7 @@
#include "Engine/Platform/Platform.h"
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Memory/Allocation.h"
#include "Engine/Core/Math/Math.h"
/// <summary>
/// Template for ring buffer with variable capacity.
@@ -98,4 +99,10 @@ public:
Memory::DestructItems(Get() + Math::Min(_front, _back), _count);
_front = _back = _count = 0;
}
void Release()
{
Clear();
_allocation.Free();
}
};

View File

@@ -0,0 +1,86 @@
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Types/BaseTypes.h"
// Base class for custom heap-based allocators (eg. with local pooling/paging). Expects only Allocate/Free methods to be provided.
template<typename This, uint32 InitialCapacity = 8>
class SimpleHeapAllocation
{
public:
enum { HasSwap = true };
template<typename T>
class Data
{
T* _data = nullptr;
uintptr _size;
public:
FORCE_INLINE Data()
{
}
FORCE_INLINE ~Data()
{
if (_data)
This::Free(_data, _size);
}
FORCE_INLINE T* Get()
{
return _data;
}
FORCE_INLINE const T* Get() const
{
return _data;
}
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
{
capacity = capacity ? capacity * 2 : InitialCapacity;
if (capacity < minCapacity)
capacity = minCapacity;
return capacity;
}
FORCE_INLINE void Allocate(uint64 capacity)
{
_size = capacity * sizeof(T);
_data = (T*)This::Allocate(_size);
}
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
{
T* newData = capacity != 0 ? (T*)This::Allocate(capacity * sizeof(T)) : nullptr;
if (oldCount)
{
if (newCount > 0)
Memory::MoveItems(newData, _data, newCount);
Memory::DestructItems(_data, oldCount);
}
if (_data)
This::Free(_data, _size);
_data = newData;
_size = capacity * sizeof(T);
}
FORCE_INLINE void Free()
{
if (_data)
{
This::Free(_data, _size);
_data = nullptr;
}
}
FORCE_INLINE void Swap(Data& other)
{
::Swap(_data, other._data);
::Swap(_size, other._size);
}
};
};

View File

@@ -30,13 +30,7 @@ namespace
Array<DrawBatch> SortingBatches;
Array<RenderList*> FreeRenderList;
struct MemPoolEntry
{
void* Ptr;
uintptr Size;
};
Array<MemPoolEntry> MemPool;
Array<Pair<void*, uintptr>> MemPool;
CriticalSection MemPoolLocker;
}
@@ -147,18 +141,16 @@ void* RendererAllocation::Allocate(uintptr size)
MemPoolLocker.Lock();
for (int32 i = 0; i < MemPool.Count(); i++)
{
if (MemPool[i].Size == size)
if (MemPool.Get()[i].Second == size)
{
result = MemPool[i].Ptr;
result = MemPool.Get()[i].First;
MemPool.RemoveAt(i);
break;
}
}
MemPoolLocker.Unlock();
if (!result)
{
result = Platform::Allocate(size, 16);
}
return result;
}
@@ -201,7 +193,7 @@ void RenderList::CleanupCache()
SortingIndices.Resize(0);
FreeRenderList.ClearDelete();
for (auto& e : MemPool)
Platform::Free(e.Ptr);
Platform::Free(e.First);
MemPool.Clear();
}

View File

@@ -2,86 +2,11 @@
#pragma once
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
class RendererAllocation
class RendererAllocation : public SimpleHeapAllocation<RendererAllocation, 64>
{
public:
static FLAXENGINE_API void* Allocate(uintptr size);
static FLAXENGINE_API void Free(void* ptr, uintptr size);
enum { HasSwap = true };
template<typename T>
class Data
{
T* _data = nullptr;
uintptr _size;
public:
FORCE_INLINE Data()
{
}
FORCE_INLINE ~Data()
{
if (_data)
RendererAllocation::Free(_data, _size);
}
FORCE_INLINE T* Get()
{
return _data;
}
FORCE_INLINE const T* Get() const
{
return _data;
}
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
{
capacity = capacity ? capacity * 2 : 64;
if (capacity < minCapacity)
capacity = minCapacity;
return capacity;
}
FORCE_INLINE void Allocate(uint64 capacity)
{
_size = capacity * sizeof(T);
_data = (T*)RendererAllocation::Allocate(_size);
}
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
{
T* newData = capacity != 0 ? (T*)RendererAllocation::Allocate(capacity * sizeof(T)) : nullptr;
if (oldCount)
{
if (newCount > 0)
Memory::MoveItems(newData, _data, newCount);
Memory::DestructItems(_data, oldCount);
}
if (_data)
RendererAllocation::Free(_data, _size);
_data = newData;
_size = capacity * sizeof(T);
}
FORCE_INLINE void Free()
{
if (_data)
{
RendererAllocation::Free(_data, _size);
_data = nullptr;
}
}
FORCE_INLINE void Swap(Data& other)
{
::Swap(_data, other._data);
::Swap(_size, other._size);
}
};
};

View File

@@ -6,6 +6,8 @@
#include "Engine/Platform/Thread.h"
#include "Engine/Platform/ConditionVariable.h"
#include "Engine/Core/Types/Span.h"
#include "Engine/Core/Types/Pair.h"
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
#include "Engine/Core/Collections/Dictionary.h"
#include "Engine/Core/Collections/RingBuffer.h"
#include "Engine/Engine/EngineService.h"
@@ -18,6 +20,14 @@
#if JOB_SYSTEM_ENABLED
// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread).
class JobSystemAllocation : public SimpleHeapAllocation<JobSystemAllocation>
{
public:
static void* Allocate(uintptr size);
static void Free(void* ptr, uintptr size);
};
class JobSystemService : public EngineService
{
public:
@@ -46,9 +56,9 @@ struct TIsPODType<JobData>
struct JobContext
{
volatile int64 JobsLeft;
volatile int64 DependenciesLeft;
int32 DependenciesLeft;
Function<void(int32)> Job;
Array<int64> Dependants;
Array<int64, JobSystemAllocation> Dependants;
};
template<>
@@ -80,12 +90,13 @@ public:
namespace
{
JobSystemService JobSystemInstance;
Array<Pair<void*, uintptr>> MemPool;
Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {};
int32 ThreadsCount = 0;
bool JobStartingOnDispatch = true;
volatile int64 ExitFlag = 0;
volatile int64 JobLabel = 0;
Dictionary<int64, JobContext> JobContexts;
Dictionary<int64, JobContext, JobSystemAllocation> JobContexts;
ConditionVariable JobsSignal;
CriticalSection JobsMutex;
ConditionVariable WaitSignal;
@@ -94,6 +105,28 @@ namespace
RingBuffer<JobData> Jobs;
}
void* JobSystemAllocation::Allocate(uintptr size)
{
void* result = nullptr;
for (int32 i = 0; i < MemPool.Count(); i++)
{
if (MemPool.Get()[i].Second == size)
{
result = MemPool.Get()[i].First;
MemPool.RemoveAt(i);
break;
}
}
if (!result)
result = Platform::Allocate(size, 16);
return result;
}
void JobSystemAllocation::Free(void* ptr, uintptr size)
{
MemPool.Add({ ptr, size });
}
bool JobSystemService::Init()
{
ThreadsCount = Math::Min<int32>(Platform::GetCPUInfo().LogicalProcessorCount, ARRAY_COUNT(Threads));
@@ -130,6 +163,12 @@ void JobSystemService::Dispose()
Threads[i] = nullptr;
}
}
JobContexts.SetCapacity(0);
Jobs.Release();
for (auto& e : MemPool)
Platform::Free(e.First);
MemPool.Clear();
}
int32 JobSystemThread::Run()
@@ -176,7 +215,7 @@ int32 JobSystemThread::Run()
for (int64 dependant : context.Dependants)
{
JobContext& dependantContext = JobContexts.At(dependant);
if (Platform::InterlockedDecrement(&dependantContext.DependenciesLeft) <= 0)
if (--dependantContext.DependenciesLeft <= 0)
{
// Dispatch dependency when it's ready
JobData dependantData;
@@ -245,7 +284,7 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
context.DependenciesLeft = 0;
JobsLocker.Lock();
JobContexts.Add(label, context);
JobContexts.Add(label, MoveTemp(context));
for (data.Index = 0; data.Index < jobCount; data.Index++)
Jobs.PushBack(data);
JobsLocker.Unlock();
@@ -291,9 +330,10 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, Span<int64> dependen
dependencyContext->Dependants.Add(label);
}
}
JobContexts.Add(label, context);
JobContexts.Add(label, MoveTemp(context));
if (context.DependenciesLeft == 0)
{
// No dependencies left to complete so dispatch now
for (data.Index = 0; data.Index < jobCount; data.Index++)
Jobs.PushBack(data);
}