diff --git a/Source/Engine/Core/Memory/Allocation.cpp b/Source/Engine/Core/Memory/Allocation.cpp index c55ab1dab..683c31c43 100644 --- a/Source/Engine/Core/Memory/Allocation.cpp +++ b/Source/Engine/Core/Memory/Allocation.cpp @@ -52,3 +52,81 @@ void* ArenaAllocator::Allocate(uint64 size, uint64 alignment) return mem; } + +void ConcurrentArenaAllocator::Free() +{ + _locker.Lock(); + + // Free all pages + Page* page = (Page*)_first; + while (page) + { +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, -(int64)page->Size, -1); +#endif + if (_free1) + _free1(page->Memory); + else + _free2(page->Memory, page->Size); + Page* next = page->Next; + if (_free1) + _free1(page); + else + _free2(page, sizeof(Page)); + page = next; + } + + // Unlink + _first = 0; + _totalBytes = 0; + + _locker.Unlock(); +} + +void* ConcurrentArenaAllocator::Allocate(uint64 size, uint64 alignment) +{ +RETRY: + + // Check if the current page has some space left + Page* page = (Page*)Platform::AtomicRead(&_first); + if (page) + { + int64 offset = Platform::AtomicRead(&page->Offset); + int64 offsetAligned = Math::AlignUp(offset, (int64)alignment); + int64 end = offsetAligned + size; + if (end <= page->Size) + { + // Try to allocate within a page + if (Platform::InterlockedCompareExchange(&page->Offset, end, offset) != offset) + { + // Someone else changed allocated so retry (new offset might mismatch alignment) + goto RETRY; + } + Platform::InterlockedAdd(&_totalBytes, (int64)size); + return (byte*)page->Memory + offsetAligned; + } + } + + // Page allocation is thread-synced + _locker.Lock(); + + // Check if page was unchanged by any other thread + if ((Page*)Platform::AtomicRead(&_first) == page) + { + uint64 pageSize = Math::Max(_pageSize, size); +#if COMPILE_WITH_PROFILER + ProfilerMemory::OnGroupUpdate(ProfilerMemory::Groups::MallocArena, (int64)pageSize, 1); +#endif + page = (Page*)(_allocate1 ? _allocate1(sizeof(Page), alignof(Page)) : _allocate2(sizeof(Page))); + page->Memory = _allocate1 ? _allocate1(pageSize, 16) : _allocate2(pageSize); + page->Next = (Page*)_first; + page->Offset = 0; + page->Size = (int64)pageSize; + Platform::AtomicStore(&_first, (intptr)page); + } + + _locker.Unlock(); + + // Use a single cde for allocation + goto RETRY; +} diff --git a/Source/Engine/Core/Memory/ArenaAllocation.h b/Source/Engine/Core/Memory/ArenaAllocation.h index 7de7e0994..eaffcff95 100644 --- a/Source/Engine/Core/Memory/ArenaAllocation.h +++ b/Source/Engine/Core/Memory/ArenaAllocation.h @@ -3,6 +3,7 @@ #pragma once #include "Allocation.h" +#include "Engine/Platform/CriticalSection.h" /// /// Allocator that uses pages for stack-based allocs without freeing memory during it's lifetime. @@ -66,21 +67,94 @@ public: } }; +/// +/// Allocator that uses pages for stack-based allocs without freeing memory during it's lifetime. Thread-safe to allocate memory from multiple threads at once. +/// +class ConcurrentArenaAllocator +{ +private: + struct Page + { + void* Memory; + Page* Next; + volatile int64 Offset; + int64 Size; + }; + + int32 _pageSize; + volatile int64 _first = 0; +#if !BUILD_RELEASE + volatile int64 _totalBytes = 0; +#endif + void*(*_allocate1)(uint64 size, uint64 alignment) = nullptr; + void(*_free1)(void* ptr) = nullptr; + void*(*_allocate2)(uint64 size) = nullptr; + void(*_free2)(void* ptr, uint64 size) = nullptr; + CriticalSection _locker; + +public: + ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size, uint64 alignment), void(*customFree)(void* ptr)) + : _pageSize(pageSizeBytes) + , _allocate1(customAllocate) + , _free1(customFree) + { + } + + ConcurrentArenaAllocator(int32 pageSizeBytes, void* (*customAllocate)(uint64 size), void(*customFree)(void* ptr, uint64 size)) + : _pageSize(pageSizeBytes) + , _allocate2(customAllocate) + , _free2(customFree) + { + } + + ConcurrentArenaAllocator(int32 pageSizeBytes = 1024 * 1024) // 1 MB by default + : ConcurrentArenaAllocator(pageSizeBytes, Allocator::Allocate, Allocator::Free) + { + } + + ~ConcurrentArenaAllocator() + { + Free(); + } + + // Gets the total amount of bytes allocated in arena (excluding alignment). + int64 GetTotalBytes() const + { + return Platform::AtomicRead(&_totalBytes); + } + + // Allocates a chunk of unitialized memory. + void* Allocate(uint64 size, uint64 alignment = 1); + + // Frees all memory allocations within allocator. + void Free(); + + // Creates a new object within the arena allocator. + template + inline T* New(Args&&...args) + { + T* ptr = (T*)Allocate(sizeof(T)); + new(ptr) T(Forward(args)...); + return ptr; + } +}; + /// /// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. /// -class ArenaAllocation +template +class ArenaAllocationBase { public: enum { HasSwap = true }; - typedef ArenaAllocator* Tag; + typedef ArenaType* Tag; template class Data { private: T* _data = nullptr; - ArenaAllocator* _arena = nullptr; + ArenaType* _arena = nullptr; public: FORCE_INLINE Data() @@ -142,3 +216,13 @@ public: } }; }; + +/// +/// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. +/// +typedef ArenaAllocationBase ArenaAllocation; + +/// +/// The memory allocation policy that uses a part of shared page allocator. Allocations are performed in stack-manner, and free is no-op. +/// +typedef ArenaAllocationBase ConcurrentArenaAllocation; diff --git a/Source/Engine/Foliage/Foliage.cpp b/Source/Engine/Foliage/Foliage.cpp index 4fc576ff8..f51f4ece0 100644 --- a/Source/Engine/Foliage/Foliage.cpp +++ b/Source/Engine/Foliage/Foliage.cpp @@ -103,17 +103,17 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan for (int32 meshIndex = 0; meshIndex < meshes.Count(); meshIndex++) { auto& drawCall = drawCallsLists[lod][meshIndex]; - if (!drawCall.DrawCall.Material) + if (!drawCall.Material) continue; DrawKey key; - key.Mat = drawCall.DrawCall.Material; + key.Mat = drawCall.Material; key.Geo = &meshes.Get()[meshIndex]; key.Lightmap = instance.Lightmap.TextureIndex; auto* e = result.TryGet(key); if (!e) { - e = &result[key]; + e = &result.Add(key, BatchedDrawCall(renderContext.List))->Value; ASSERT_LOW_LAYER(key.Mat); e->DrawCall.Material = key.Mat; e->DrawCall.Surface.Lightmap = EnumHasAnyFlags(_staticFlags, StaticFlags::Lightmap) && _scene ? _scene->LightmapsData.GetReadyLightmap(key.Lightmap) : nullptr; @@ -127,7 +127,7 @@ void Foliage::DrawInstance(RenderContext& renderContext, FoliageInstance& instan const Float3 translation = transform.Translation - renderContext.View.Origin; Matrix::Transformation(transform.Scale, transform.Orientation, translation, world); constexpr float worldDeterminantSign = 1.0f; - instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.DrawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor); + instanceData.Store(world, world, instance.Lightmap.UVsArea, drawCall.Surface.GeometrySize, instance.Random, worldDeterminantSign, lodDitherFactor); } } @@ -430,7 +430,7 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr { const auto& mesh = meshes.Get()[meshIndex]; auto& drawCall = drawCallsList.Get()[meshIndex]; - drawCall.DrawCall.Material = nullptr; + drawCall.Material = nullptr; // DrawInstance skips draw calls from meshes with unset material // Check entry visibility const auto& entry = type.Entries[mesh.GetMaterialSlotIndex()]; @@ -455,13 +455,13 @@ void Foliage::DrawType(RenderContext& renderContext, const FoliageType& type, Dr if (drawModes == DrawPass::None) continue; - drawCall.DrawCall.Material = material; - drawCall.DrawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); + drawCall.Material = material; + drawCall.Surface.GeometrySize = mesh.GetBox().GetSize(); } } // Draw instances of the foliage type - BatchedDrawCalls result; + BatchedDrawCalls result(&renderContext.List->Memory); DrawCluster(renderContext, type.Root, type, drawCallsLists, result); // Submit draw calls with valid instances added diff --git a/Source/Engine/Foliage/Foliage.h b/Source/Engine/Foliage/Foliage.h index 1855e9914..6f8b36cf4 100644 --- a/Source/Engine/Foliage/Foliage.h +++ b/Source/Engine/Foliage/Foliage.h @@ -6,6 +6,7 @@ #include "FoliageInstance.h" #include "FoliageCluster.h" #include "FoliageType.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Level/Actor.h" /// @@ -178,8 +179,8 @@ private: } }; - typedef Array> DrawCallsList; - typedef Dictionary BatchedDrawCalls; + typedef Array> DrawCallsList; + typedef Dictionary BatchedDrawCalls; void DrawInstance(RenderContext& renderContext, FoliageInstance& instance, const FoliageType& type, Model* model, int32 lod, float lodDitherFactor, DrawCallsList* drawCallsLists, BatchedDrawCalls& result) const; void DrawCluster(RenderContext& renderContext, FoliageCluster* cluster, const FoliageType& type, DrawCallsList* drawCallsLists, BatchedDrawCalls& result) const; #else diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 2a6540da5..0dedfda38 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -169,6 +169,7 @@ void RenderEnvironmentProbeData::SetShaderData(ShaderEnvProbeData& data) const void* RendererAllocation::Allocate(uintptr size) { + PROFILE_CPU(); void* result = nullptr; MemPoolLocker.Lock(); for (int32 i = 0; i < MemPool.Count(); i++) @@ -188,6 +189,7 @@ void* RendererAllocation::Allocate(uintptr size) void RendererAllocation::Free(void* ptr, uintptr size) { + PROFILE_CPU(); MemPoolLocker.Lock(); MemPool.Add({ ptr, size }); MemPoolLocker.Unlock(); @@ -418,6 +420,18 @@ bool RenderList::HasAnyPostFx(const RenderContext& renderContext, MaterialPostFx return false; } +BatchedDrawCall::BatchedDrawCall(RenderList* list) + : Instances(&list->Memory) +{ +} + +BatchedDrawCall::BatchedDrawCall(BatchedDrawCall&& other) noexcept + : DrawCall(other.DrawCall) + , ObjectsStartIndex(other.ObjectsStartIndex) + , Instances(MoveTemp(other.Instances)) +{ +} + void DrawCallsList::Clear() { Indices.Clear(); @@ -433,6 +447,7 @@ bool DrawCallsList::IsEmpty() const RenderList::RenderList(const SpawnParams& params) : ScriptingObject(params) + , Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation , DirectionalLights(4) , PointLights(32) , SpotLights(32) @@ -443,8 +458,8 @@ RenderList::RenderList(const SpawnParams& params) , AtmosphericFog(nullptr) , Fog(nullptr) , Blendable(32) - , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer")) - , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Bufffer")) + , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) + , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) { } @@ -480,6 +495,7 @@ void RenderList::Clear() _instanceBuffer.Clear(); ObjectBuffer.Clear(); TempObjectBuffer.Clear(); + Memory.Free(); } struct PackedSortKey diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index d5288e6ee..f17e1b045 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -3,6 +3,7 @@ #pragma once #include "Engine/Core/Collections/Array.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Core/Math/Half.h" #include "Engine/Graphics/PostProcessSettings.h" #include "Engine/Graphics/DynamicBuffer.h" @@ -241,7 +242,11 @@ struct BatchedDrawCall { DrawCall DrawCall; uint16 ObjectsStartIndex = 0; // Index of the instances start in the ObjectsBuffer (set internally). - Array Instances; + Array Instances; + + BatchedDrawCall() { CRASH; } // Don't use it + BatchedDrawCall(RenderList* list); + BatchedDrawCall(BatchedDrawCall&& other) noexcept; }; /// @@ -298,6 +303,11 @@ API_CLASS(Sealed) class FLAXENGINE_API RenderList : public ScriptingObject static void CleanupCache(); public: + /// + /// Memory storage with all draw-related data that lives during a single frame rendering time. Thread-safe to allocate memory during rendering jobs. + /// + ConcurrentArenaAllocator Memory; + /// /// All scenes for rendering. ///