diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 54b3d1dd8..88fa2cd88 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -210,31 +210,10 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff const int32 stride = buffer->Stride; const int32 listSize = buffer->CPU.Count; const int32 indicesByteSize = listSize * buffer->GPU.SortedIndices->GetStride(); - Array sortingKeysList[4]; - Array sortingIndicesList[2]; - uint32* sortingKeys[2]; - void* sortingIndices[2]; - if (listSize < 500) - { - // Use fast stack allocator from RenderList - auto& memory = renderContextBatch.GetMainContext().List->Memory; - sortingKeys[0] = memory.Allocate(listSize); - sortingKeys[1] = memory.Allocate(listSize); - sortingIndices[0] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); - sortingIndices[1] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); - } - else - { - // Use shared pooled memory from RendererAllocation - sortingKeysList[0].Resize(listSize); - sortingKeysList[1].Resize(listSize); - sortingIndicesList[0].Resize(indicesByteSize); - sortingIndicesList[1].Resize(indicesByteSize); - sortingKeys[0] = sortingKeysList[0].Get(); - sortingKeys[1] = sortingKeysList[1].Get(); - sortingIndices[0] = sortingIndicesList[0].Get(); - sortingIndices[1] = sortingIndicesList[1].Get(); - } + RenderListAlloc sortingAllocs[4]; + auto* renderList = renderContextBatch.GetMainContext().List; + uint32* sortingKeys[2] = { sortingAllocs[0].Init(renderList, listSize), sortingAllocs[1].Init(renderList, listSize) }; + void* sortingIndices[2] = { sortingAllocs[2].Init(renderList, indicesByteSize, GPU_SHADER_DATA_ALIGNMENT), sortingAllocs[3].Init(renderList, indicesByteSize, GPU_SHADER_DATA_ALIGNMENT) }; uint32* sortedKeys = sortingKeys[0]; const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0; switch (sortMode) @@ -321,7 +300,7 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff { case PixelFormat::R16_UInt: for (int32 i = 0; i < listSize; i++) - ((uint16*)sortedIndices)[i] = i; + ((uint16*)sortedIndices)[i] = (uint16)i; break; case PixelFormat::R32_UInt: for (int32 i = 0; i < listSize; i++) diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index 6bd53ec87..2c62ebdd6 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -461,6 +461,25 @@ bool DrawCallsList::IsEmpty() const return Indices.Count() + PreBatchedDrawCalls.Count() == 0; } +RenderListAlloc::~RenderListAlloc() +{ + if (!List && Data) // Render List memory doesn't need free (arena allocator) + RendererAllocation::Free(Data, Size); +} + +void* RenderListAlloc::Init(RenderList* list, uintptr size, uintptr alignment) +{ + ASSERT_LOW_LAYER(!Data); + Size = size; + bool useList = alignment <= 16 && size < 1024; + List = useList ? list : nullptr; + if (useList) + Data = list->Memory.Allocate(size, alignment); + else + Data = RendererAllocation::Allocate(size); + return Data; +} + RenderList::RenderList(const SpawnParams& params) : ScriptingObject(params) , Memory(4 * 1024 * 1024, RendererAllocation::Allocate, RendererAllocation::Free) // 4MB pages, use page pooling via RendererAllocation @@ -692,12 +711,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD ZoneValue(listSize); // Use shared memory from renderer allocator - Array SortingKeys[2]; - Array SortingIndices; - SortingKeys[0].Resize(listSize); - SortingKeys[1].Resize(listSize); - SortingIndices.Resize(listSize); - uint64* sortedKeys = SortingKeys[0].Get(); + RenderListAlloc allocs[3]; + uint64* sortedKeys = allocs[0].Init(this, listSize); + uint64* tempKeys = allocs[1].Init(this, listSize); + int32* tempIndices = allocs[2].Init(this, listSize); // Setup sort keys if (reverseDistance) @@ -740,7 +757,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD // Sort draw calls indices int32* resultIndices = list.Indices.Get(); - Sorting::RadixSort(sortedKeys, resultIndices, SortingKeys[1].Get(), SortingIndices.Get(), listSize); + Sorting::RadixSort(sortedKeys, resultIndices, tempKeys, tempIndices, listSize); if (resultIndices != list.Indices.Get()) Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index af1e1f44a..ce73f1dcd 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -278,6 +278,30 @@ struct DrawCallsList bool IsEmpty() const; }; +// Small utility for allocating memory from RenderList arena pool with automatic fallback to shared RendererAllocation for larger memory blocks. +struct RenderListAlloc +{ + RenderList* List; + void* Data = nullptr; + uintptr Size; + + ~RenderListAlloc(); + + void* Init(RenderList* list, uintptr size, uintptr alignment = 1); + + template + FORCE_INLINE T* Init(RenderList* list, int32 count, uintptr alignment = 1) + { + return (T*)Init(list, count * sizeof(T), alignment); + } + + template + FORCE_INLINE T* Get() + { + return (T*)Data; + } +}; + /// /// Rendering cache container object for the draw calls collecting, sorting and executing. ///