From fca4f4ba406c886ae85be248067b05c6ed35948f Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Sun, 27 Jun 2021 12:30:49 +0200 Subject: [PATCH] Optimize CPU particles sorting with Radix sort --- Source/Editor/Utilities/EditorScene.cpp | 2 - Source/Engine/Core/Collections/Sorting.cpp | 39 ++++ Source/Engine/Core/Collections/Sorting.h | 207 ++++++++++----------- Source/Engine/Graphics/RenderTools.h | 10 + Source/Engine/Level/Scene/Scene.cpp | 18 ++ Source/Engine/Level/Scene/Scene.h | 22 +-- Source/Engine/Level/Scene/SceneAsset.h | 15 ++ Source/Engine/Particles/Particles.cpp | 120 +++++------- Source/Engine/Renderer/RenderList.cpp | 114 +----------- 9 files changed, 240 insertions(+), 307 deletions(-) create mode 100644 Source/Engine/Level/Scene/SceneAsset.h diff --git a/Source/Editor/Utilities/EditorScene.cpp b/Source/Editor/Utilities/EditorScene.cpp index 4188e7eb3..4350a31ff 100644 --- a/Source/Editor/Utilities/EditorScene.cpp +++ b/Source/Editor/Utilities/EditorScene.cpp @@ -2,8 +2,6 @@ #include "EditorScene.h" -#include "Engine/Debug/DebugDraw.h" - EditorScene::EditorScene(const SpawnParams& params) : Scene(params) { diff --git a/Source/Engine/Core/Collections/Sorting.cpp b/Source/Engine/Core/Collections/Sorting.cpp index 4874c46ee..4b2f2021e 100644 --- a/Source/Engine/Core/Collections/Sorting.cpp +++ b/Source/Engine/Core/Collections/Sorting.cpp @@ -10,3 +10,42 @@ Sorting::SortingStack& Sorting::SortingStack::Get() { return SortingStacks.Get(); } + +Sorting::SortingStack::SortingStack() +{ +} + +Sorting::SortingStack::~SortingStack() +{ + Allocator::Free(Data); +} + +void Sorting::SortingStack::SetCapacity(const int32 capacity) +{ + ASSERT(capacity >= 0); + if (capacity == Capacity) + return; + int32* newData = nullptr; + if (capacity > 0) + newData = (int32*)Allocator::Allocate(capacity * sizeof(int32)); + const int32 newCount = Count < capacity ? Count : capacity; + if (Data) + { + if (newData && newCount) + Platform::MemoryCopy(newData, Data, newCount * sizeof(int32)); + Allocator::Free(Data); + } + Data = newData; + Capacity = capacity; + Count = newCount; +} + +void Sorting::SortingStack::EnsureCapacity(int32 minCapacity) +{ + if (Capacity >= minCapacity) + return; + int32 num = Capacity == 0 ? 64 : Capacity * 2; + if (num < minCapacity) + num = minCapacity; + SetCapacity(num); +} diff --git a/Source/Engine/Core/Collections/Sorting.h b/Source/Engine/Core/Collections/Sorting.h index 6aa584ace..98b065815 100644 --- a/Source/Engine/Core/Collections/Sorting.h +++ b/Source/Engine/Core/Collections/Sorting.h @@ -2,8 +2,6 @@ #pragma once -#include "Engine/Core/Templates.h" -#include "Engine/Core/Memory/Memory.h" #include "Engine/Core/Types/BaseTypes.h" #include "Engine/Platform/Platform.h" @@ -23,111 +21,29 @@ public: static SortingStack& Get(); - public: + int32 Count = 0; + int32 Capacity = 0; + int32* Data = nullptr; - int32 _count; - int32 _capacity; - int32* _data; + SortingStack(); + ~SortingStack(); - public: - - /// - /// Initializes a new instance of the class. - /// - SortingStack() - : _count(0) - , _capacity(0) - , _data(nullptr) - { - } - - /// - /// Finalizes an instance of the class. - /// - ~SortingStack() - { - Allocator::Free(_data); - } - - public: - - FORCE_INLINE int32 Count() const - { - return _count; - } - - FORCE_INLINE int32 Capacity() const - { - return _capacity; - } - - FORCE_INLINE bool HasItems() const - { - return _count > 0; - } - - public: - - FORCE_INLINE void Clear() - { - _count = 0; - } + void SetCapacity(int32 capacity); + void EnsureCapacity(int32 minCapacity); void Push(const int32 item) { - EnsureCapacity(_count + 1); - _data[_count++] = item; + EnsureCapacity(Count + 1); + Data[Count++] = item; } int32 Pop() { - ASSERT(_count > 0); - const int32 item = _data[_count - 1]; - _count--; + ASSERT(Count > 0); + const int32 item = Data[Count - 1]; + Count--; return item; } - - public: - - void SetCapacity(const int32 capacity) - { - ASSERT(capacity >= 0); - - if (capacity == _capacity) - return; - - int32* newData = nullptr; - if (capacity > 0) - { - newData = (int32*)Allocator::Allocate(capacity * sizeof(int32)); - } - - if (_data) - { - if (newData && _count > 0) - { - for (int32 i = 0; i < _count && i < capacity; i++) - newData[i] = _data[i]; - } - Allocator::Free(_data); - } - - _data = newData; - _capacity = capacity; - _count = _count < _capacity ? _count : _capacity; - } - - void EnsureCapacity(int32 minCapacity) - { - if (_capacity >= minCapacity) - return; - - int32 num = _capacity == 0 ? 64 : _capacity * 2; - if (num < minCapacity) - num = minCapacity; - - SetCapacity(num); - } }; public: @@ -142,7 +58,6 @@ public: { if (count < 2) return; - auto& stack = SortingStack::Get(); // Push left and right @@ -150,7 +65,7 @@ public: stack.Push(count - 1); // Keep sorting from stack while is not empty - while (stack.HasItems()) + while (stack.Count) { // Pop right and left int32 right = stack.Pop(); @@ -197,7 +112,6 @@ public: { if (count < 2) return; - auto& stack = SortingStack::Get(); // Push left and right @@ -205,7 +119,7 @@ public: stack.Push(count - 1); // Keep sorting from stack while is not empty - while (stack.HasItems()) + while (stack.Count) { // Pop right and left int32 right = stack.Pop(); @@ -246,7 +160,6 @@ public: { if (count < 2) return; - auto& stack = SortingStack::Get(); // Push left and right @@ -254,7 +167,7 @@ public: stack.Push(count - 1); // Keep sorting from stack while is not empty - while (stack.HasItems()) + while (stack.Count != 0) { // Pop right and left int32 right = stack.Pop(); @@ -300,7 +213,6 @@ public: { if (count < 2) return; - auto& stack = SortingStack::Get(); // Push left and right @@ -308,7 +220,7 @@ public: stack.Push(count - 1); // Keep sorting from stack while is not empty - while (stack.HasItems()) + while (stack.Count) { // Pop right and left int32 right = stack.Pop(); @@ -343,4 +255,91 @@ public: } } } + + /// + /// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection). + /// + /// The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container. + /// The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container. + /// The data pointer to the temporary sorting keys array. + /// The data pointer to the temporary values array. + /// The elements count. + template + static void RadixSort(T*& inputKeys, U*& inputValues, T* tmpKeys, U* tmpValues, int32 count) + { + // Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl + enum + { + RADIXSORT_BITS = 11, + RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS, + RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1 + }; + if (count < 2) + return; + + T* keys = inputKeys; + T* tempKeys = tmpKeys; + U* values = inputValues; + U* tempValues = tmpValues; + + uint32 histogram[RADIXSORT_HISTOGRAM_SIZE]; + uint16 shift = 0; + int32 pass = 0; + for (; pass < 6; pass++) + { + Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE); + + bool sorted = true; + T key = keys[0]; + T prevKey = key; + for (int32 i = 0; i < count; i++) + { + key = keys[i]; + const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK; + ++histogram[index]; + sorted &= prevKey <= key; + prevKey = key; + } + + if (sorted) + { + goto end; + } + + uint32 offset = 0; + for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i) + { + const uint32 cnt = histogram[i]; + histogram[i] = offset; + offset += cnt; + } + + for (int32 i = 0; i < count; i++) + { + const T k = keys[i]; + const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK; + const uint32 dest = histogram[index]++; + tempKeys[dest] = k; + tempValues[dest] = values[i]; + } + + T* const swapKeys = tempKeys; + tempKeys = keys; + keys = swapKeys; + + U* const swapValues = tempValues; + tempValues = values; + values = swapValues; + + shift += RADIXSORT_BITS; + } + + end: + if (pass & 1) + { + // Use temporary keys and values as a result + inputKeys = tmpKeys; + inputValues = tmpValues; + } + } }; diff --git a/Source/Engine/Graphics/RenderTools.h b/Source/Engine/Graphics/RenderTools.h index 29ac22e40..bbfe07e0b 100644 --- a/Source/Engine/Graphics/RenderTools.h +++ b/Source/Engine/Graphics/RenderTools.h @@ -98,6 +98,16 @@ public: /// The rendering context. /// The zero-based LOD index. Returns -1 if model should not be rendered. API_FUNCTION() static int32 ComputeSkinnedModelLOD(const SkinnedModel* model, API_PARAM(Ref) const Vector3& origin, float radius, API_PARAM(Ref) const RenderContext& renderContext); + + /// + /// Computes the sorting key for depth value (quantized) + /// Reference: http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/ + /// + FORCE_INLINE static uint32 ComputeDistanceSortKey(float distance) + { + const uint32 distanceI = *((uint32*)&distance); + return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI; + } }; // Get texture memory usage diff --git a/Source/Engine/Level/Scene/Scene.cpp b/Source/Engine/Level/Scene/Scene.cpp index fb9b4ce50..9304b679a 100644 --- a/Source/Engine/Level/Scene/Scene.cpp +++ b/Source/Engine/Level/Scene/Scene.cpp @@ -1,6 +1,7 @@ // Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. #include "Scene.h" +#include "SceneAsset.h" #include "Engine/Level/Level.h" #include "Engine/Content/AssetInfo.h" #include "Engine/Content/Content.h" @@ -24,6 +25,11 @@ SceneAsset::SceneAsset(const SpawnParams& params, const AssetInfo* info) { } +bool SceneAsset::IsInternalType() const +{ + return true; +} + #define CSG_COLLIDER_NAME TEXT("CSG.Collider") #define CSG_MODEL_NAME TEXT("CSG.Model") @@ -235,6 +241,18 @@ void Scene::OnCsgModelChanged() } } +#if COMPILE_WITH_CSG_BUILDER + +void Scene::OnCSGBuildEnd() +{ + if (CSGData.CollisionData && TryGetCsgCollider() == nullptr) + CreateCsgCollider(); + if (CSGData.Model && TryGetCsgModel() == nullptr) + CreateCsgModel(); +} + +#endif + void Scene::Serialize(SerializeStream& stream, const void* otherObj) { // Base diff --git a/Source/Engine/Level/Scene/Scene.h b/Source/Engine/Level/Scene/Scene.h index c576e7eca..305a31ca6 100644 --- a/Source/Engine/Level/Scene/Scene.h +++ b/Source/Engine/Level/Scene/Scene.h @@ -4,7 +4,6 @@ #include "../Actor.h" #include "../SceneInfo.h" -#include "Engine/Content/JsonAsset.h" #include "SceneLightmapsData.h" #include "SceneCSGData.h" #include "SceneRendering.h" @@ -150,13 +149,7 @@ private: void OnCsgCollisionDataChanged(); void OnCsgModelChanged(); #if COMPILE_WITH_CSG_BUILDER - void OnCSGBuildEnd() - { - if (CSGData.CollisionData && TryGetCsgCollider() == nullptr) - CreateCsgCollider(); - if (CSGData.Model && TryGetCsgModel() == nullptr) - CreateCsgModel(); - } + void OnCSGBuildEnd(); #endif public: @@ -175,16 +168,3 @@ protected: void BeginPlay(SceneBeginData* data) override; void OnTransformChanged() override; }; - -/// -/// The scene asset. -/// -API_CLASS(NoSpawn) class SceneAsset : public JsonAsset -{ -DECLARE_ASSET_HEADER(SceneAsset); -protected: - bool IsInternalType() const override - { - return true; - } -}; diff --git a/Source/Engine/Level/Scene/SceneAsset.h b/Source/Engine/Level/Scene/SceneAsset.h new file mode 100644 index 000000000..e56ed323e --- /dev/null +++ b/Source/Engine/Level/Scene/SceneAsset.h @@ -0,0 +1,15 @@ +// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved. + +#pragma once + +#include "Engine/Content/JsonAsset.h" + +/// +/// The scene asset. +/// +API_CLASS(NoSpawn) class SceneAsset : public JsonAsset +{ +DECLARE_ASSET_HEADER(SceneAsset); +protected: + bool IsInternalType() const override; +}; diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index c3f72270c..8f281581a 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -12,6 +12,7 @@ #include "Engine/Graphics/GPUPipelineStatePermutations.h" #include "Engine/Graphics/RenderTask.h" #include "Engine/Graphics/DynamicBuffer.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Profiler/ProfilerCPU.h" #include "Engine/Renderer/DrawCall.h" #include "Engine/Renderer/RenderList.h" @@ -57,15 +58,7 @@ public: { +0.5f, +0.5f, 1.0f, 1.0f }, { -0.5f, +0.5f, 0.0f, 1.0f }, }; - static uint16 indexBuffer[] = - { - 0, - 1, - 2, - 0, - 2, - 3, - }; + static uint16 indexBuffer[] = { 0, 1, 2, 0, 2, 3, }; return VB->Init(GPUBufferDescription::Vertex(sizeof(SpriteParticleVertex), VertexCount, vertexBuffer)) || IB->Init(GPUBufferDescription::Index(sizeof(uint16), IndexCount, indexBuffer)); } @@ -117,24 +110,9 @@ SpriteParticleRenderer SpriteRenderer; namespace ParticlesDrawCPU { - struct ParticleSortKey - { - uint32 Index; - float Order; - - FORCE_INLINE static bool SortAscending(const ParticleSortKey& a, const ParticleSortKey& b) - { - return a.Order < b.Order; - }; - - FORCE_INLINE static bool SortDescending(const ParticleSortKey& a, const ParticleSortKey& b) - { - return b.Order < a.Order; - }; - }; - - Array SortedIndices; - Array ParticlesOrder; + Array SortingKeys[2]; + Array SortingIndices; + Array SortedIndices; Array RibbonTotalDistances; } @@ -192,12 +170,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa // Prepare sorting data if (!buffer->GPU.SortedIndices) buffer->AllocateSortBuffer(); - auto& particlesOrder = ParticlesDrawCPU::ParticlesOrder; - particlesOrder.Clear(); - particlesOrder.Resize(buffer->CPU.Count); - auto& sortedIndices = ParticlesDrawCPU::SortedIndices; - sortedIndices.Clear(); - sortedIndices.Resize(buffer->Capacity * emitter->Graph.SortModules.Count()); // Execute all sorting modules for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) @@ -205,24 +177,27 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa auto module = emitter->Graph.SortModules[moduleIndex]; const int32 sortedIndicesOffset = module->SortedIndicesOffset; const auto sortMode = static_cast(module->Values[2].AsInt); - if (sortedIndicesOffset >= sortedIndices.Count()) - continue; - + const int32 stride = buffer->Stride; + const int32 listSize = buffer->CPU.Count; +#define PREPARE_CACHE(list) (ParticlesDrawCPU::list).Clear(); (ParticlesDrawCPU::list).Resize(listSize) + PREPARE_CACHE(SortingKeys[0]); + PREPARE_CACHE(SortingKeys[1]); + PREPARE_CACHE(SortingIndices); +#undef PREPARE_CACHE + uint32* sortedKeys = ParticlesDrawCPU::SortingKeys[0].Get(); + const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0; switch (sortMode) { case ParticleSortMode::ViewDepth: { const Matrix viewProjection = renderContext.View.ViewProjection(); - const int32 stride = buffer->Stride; byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset(); - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { for (int32 i = 0; i < buffer->CPU.Count; i++) { - Vector3 position = *(Vector3*)positionPtr; - particlesOrder[i].Index = i; - particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, position)).W; + // TODO: use SIMD + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, *(Vector3*)positionPtr)).W) ^ sortKeyXor; positionPtr += stride; } } @@ -230,29 +205,22 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { for (int32 i = 0; i < buffer->CPU.Count; i++) { - Vector3 position = *(Vector3*)positionPtr; - particlesOrder[i].Index = i; - particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, position).W; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, *(Vector3*)positionPtr).W) ^ sortKeyXor; positionPtr += stride; } } - - Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending); break; } case ParticleSortMode::ViewDistance: { const Vector3 viewPosition = renderContext.View.Position; - const int32 stride = buffer->Stride; byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset(); - if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) { for (int32 i = 0; i < buffer->CPU.Count; i++) { - Vector3 position = *(Vector3*)positionPtr; - particlesOrder[i].Index = i; - particlesOrder[i].Order = (viewPosition - Vector3::Transform(position, drawCall.World)).LengthSquared(); + // TODO: use SIMD + sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - Vector3::Transform(*(Vector3*)positionPtr, drawCall.World)).LengthSquared()) ^ sortKeyXor; positionPtr += stride; } } @@ -260,14 +228,11 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa { for (int32 i = 0; i < buffer->CPU.Count; i++) { - Vector3 position = *(Vector3*)positionPtr; - particlesOrder[i].Index = i; - particlesOrder[i].Order = (viewPosition - position).LengthSquared(); + // TODO: use SIMD + sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - *(Vector3*)positionPtr).LengthSquared()) ^ sortKeyXor; positionPtr += stride; } } - - Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending); break; } case ParticleSortMode::CustomAscending: @@ -276,20 +241,12 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa int32 attributeIdx = module->Attributes[0]; if (attributeIdx == -1) break; - const int32 stride = buffer->Stride; byte* attributePtr = buffer->CPU.Buffer.Get() + emitter->Graph.Layout.Attributes[attributeIdx].Offset; - for (int32 i = 0; i < buffer->CPU.Count; i++) { - particlesOrder[i].Index = i; - particlesOrder[i].Order = *(float*)attributePtr; + sortedKeys[i] = RenderTools::ComputeDistanceSortKey(*(float*)attributePtr) ^ sortKeyXor; attributePtr += stride; } - - if (sortMode == ParticleSortMode::CustomAscending) - Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortAscending); - else - Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending); break; } #if !BUILD_RELEASE @@ -298,17 +255,31 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa #endif } - // Copy sorted indices - for (int32 k = 0; k < buffer->CPU.Count; k++) - sortedIndices[sortedIndicesOffset + k] = particlesOrder[k].Index; - } + // Generate sorting indices + int32* sortedIndices; + { + ParticlesDrawCPU::SortedIndices.Resize(listSize); + sortedIndices = ParticlesDrawCPU::SortedIndices.Get(); + for (int i = 0; i < listSize; i++) + sortedIndices[i] = i; + } - // Upload CPU particles indices - context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices.Get(), sortedIndices.Count() * sizeof(int32)); + // Sort keys with indices + { + Sorting::RadixSort(sortedKeys, sortedIndices, ParticlesDrawCPU::SortingKeys[1].Get(), ParticlesDrawCPU::SortingIndices.Get(), listSize); + } + + // Upload CPU particles indices + { + context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(int32), sortedIndicesOffset); + } + } } // Upload CPU particles data to GPU - context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride); + { + context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride); + } // Check if need to setup ribbon modules int32 ribbonModuleIndex = 0; @@ -409,7 +380,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa ribbonSegmentDistancesBuffer = GPUDevice::Instance->CreateBuffer(TEXT("RibbonSegmentDistances")); ribbonSegmentDistancesBuffer->Init(GPUBufferDescription::Typed(buffer->Capacity, PixelFormat::R32_Float, false, GPUResourceUsage::Dynamic)); } - context->UpdateBuffer(ribbonSegmentDistancesBuffer, totalDistances.Get(), totalDistances.Count() * sizeof(float)); } @@ -1195,7 +1165,9 @@ void ParticleManagerService::Dispose() } CleanupGPUParticlesSorting(); #endif - ParticlesDrawCPU::ParticlesOrder.SetCapacity(0); + ParticlesDrawCPU::SortingKeys[0].SetCapacity(0); + ParticlesDrawCPU::SortingKeys[1].SetCapacity(0); + ParticlesDrawCPU::SortingIndices.SetCapacity(0); ParticlesDrawCPU::SortedIndices.SetCapacity(0); ParticlesDrawCPU::RibbonTotalDistances.SetCapacity(0); diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index ee66fc7b2..b92985a8f 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -9,6 +9,7 @@ #include "Engine/Graphics/PostProcessBase.h" #include "Engine/Graphics/GPULimits.h" #include "Engine/Graphics/RenderTargetPool.h" +#include "Engine/Graphics/RenderTools.h" #include "Engine/Profiler/Profiler.h" #include "Engine/Content/Assets/CubeTexture.h" #include "Engine/Level/Scene/Lightmap.h" @@ -32,8 +33,6 @@ namespace Array FreeRenderList; } -#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize) - void RendererDirectionalLightData::SetupLightData(LightData* data, const RenderView& view, bool useShadow) const { data->SpotAngles.X = -2.0f; @@ -399,108 +398,6 @@ void RenderList::AddDrawCall(DrawPass drawModes, StaticFlags staticFlags, DrawCa } } -uint32 ComputeDistance(float distance) -{ - // Compute sort key (http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/) - uint32 distanceI = *((uint32*)&distance); - return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI; -} - -/// -/// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection). -/// -/// The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container. -/// The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container. -/// The data pointer to the temporary sorting keys array. -/// The data pointer to the temporary values array. -/// The elements count. -template -static void RadixSort(T*& inputKeys, U* inputValues, T* tmpKeys, U* tmpValues, int32 count) -{ - // Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl - enum - { - RADIXSORT_BITS = 11, - RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS, - RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1 - }; - - if (count < 2) - return; - - T* keys = inputKeys; - T* tempKeys = tmpKeys; - U* values = inputValues; - U* tempValues = tmpValues; - - uint32 histogram[RADIXSORT_HISTOGRAM_SIZE]; - uint16 shift = 0; - int32 pass = 0; - for (; pass < 6; pass++) - { - Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE); - - bool sorted = true; - T key = keys[0]; - T prevKey = key; - for (int32 i = 0; i < count; i++) - { - key = keys[i]; - const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK; - ++histogram[index]; - sorted &= prevKey <= key; - prevKey = key; - } - - if (sorted) - { - goto end; - } - - uint32 offset = 0; - for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i) - { - const uint32 cnt = histogram[i]; - histogram[i] = offset; - offset += cnt; - } - - for (int32 i = 0; i < count; i++) - { - const T k = keys[i]; - const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK; - const uint32 dest = histogram[index]++; - tempKeys[dest] = k; - tempValues[dest] = values[i]; - } - - T* const swapKeys = tempKeys; - tempKeys = keys; - keys = swapKeys; - - U* const swapValues = tempValues; - tempValues = values; - values = swapValues; - - shift += RADIXSORT_BITS; - } - -end: - if (pass & 1) - { - // Use temporary keys as a result - inputKeys = tmpKeys; - -#if 0 - // Use temporary values as a result - inputValues = tmpValues; -#else - // Odd number of passes needs to do copy to the destination - Platform::MemoryCopy(inputValues, tmpValues, sizeof(U) * count); -#endif - } -} - namespace { /// @@ -530,9 +427,11 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD const Plane plane(renderContext.View.Position, renderContext.View.Direction); // Peek shared memory +#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize) PREPARE_CACHE(SortingKeys[0]); PREPARE_CACHE(SortingKeys[1]); PREPARE_CACHE(SortingIndices); +#undef PREPARE_CACHE uint64* sortedKeys = SortingKeys[0].Get(); // Generate sort keys (by depth) and batch keys (higher bits) @@ -541,7 +440,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD { auto& drawCall = DrawCalls[list.Indices[i]]; const auto distance = CollisionsHelper::DistancePlanePoint(plane, drawCall.ObjectPosition); - const uint32 sortKey = ComputeDistance(distance) ^ sortKeyXor; + const uint32 sortKey = RenderTools::ComputeDistanceSortKey(distance) ^ sortKeyXor; int32 batchKey = GetHash(drawCall.Geometry.IndexBuffer); batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]); batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]); @@ -560,7 +459,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD } // Sort draw calls indices - RadixSort(sortedKeys, list.Indices.Get(), SortingKeys[1].Get(), SortingIndices.Get(), listSize); + int32* resultIndices = list.Indices.Get(); + Sorting::RadixSort(sortedKeys, resultIndices, SortingKeys[1].Get(), SortingIndices.Get(), listSize); + if (resultIndices != list.Indices.Get()) + Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize); // Perform draw calls batching list.Batches.Clear();