Optimize CPU particles sorting with Radix sort

This commit is contained in:
Wojtek Figat
2021-06-27 12:30:49 +02:00
parent bf1a30c5c6
commit fca4f4ba40
9 changed files with 240 additions and 307 deletions

View File

@@ -2,8 +2,6 @@
#include "EditorScene.h"
#include "Engine/Debug/DebugDraw.h"
EditorScene::EditorScene(const SpawnParams& params)
: Scene(params)
{

View File

@@ -10,3 +10,42 @@ Sorting::SortingStack& Sorting::SortingStack::Get()
{
return SortingStacks.Get();
}
Sorting::SortingStack::SortingStack()
{
}
Sorting::SortingStack::~SortingStack()
{
Allocator::Free(Data);
}
void Sorting::SortingStack::SetCapacity(const int32 capacity)
{
ASSERT(capacity >= 0);
if (capacity == Capacity)
return;
int32* newData = nullptr;
if (capacity > 0)
newData = (int32*)Allocator::Allocate(capacity * sizeof(int32));
const int32 newCount = Count < capacity ? Count : capacity;
if (Data)
{
if (newData && newCount)
Platform::MemoryCopy(newData, Data, newCount * sizeof(int32));
Allocator::Free(Data);
}
Data = newData;
Capacity = capacity;
Count = newCount;
}
void Sorting::SortingStack::EnsureCapacity(int32 minCapacity)
{
if (Capacity >= minCapacity)
return;
int32 num = Capacity == 0 ? 64 : Capacity * 2;
if (num < minCapacity)
num = minCapacity;
SetCapacity(num);
}

View File

@@ -2,8 +2,6 @@
#pragma once
#include "Engine/Core/Templates.h"
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Platform/Platform.h"
@@ -23,111 +21,29 @@ public:
static SortingStack& Get();
public:
int32 Count = 0;
int32 Capacity = 0;
int32* Data = nullptr;
int32 _count;
int32 _capacity;
int32* _data;
SortingStack();
~SortingStack();
public:
/// <summary>
/// Initializes a new instance of the <see cref="SortingStack"/> class.
/// </summary>
SortingStack()
: _count(0)
, _capacity(0)
, _data(nullptr)
{
}
/// <summary>
/// Finalizes an instance of the <see cref="SortingStack"/> class.
/// </summary>
~SortingStack()
{
Allocator::Free(_data);
}
public:
FORCE_INLINE int32 Count() const
{
return _count;
}
FORCE_INLINE int32 Capacity() const
{
return _capacity;
}
FORCE_INLINE bool HasItems() const
{
return _count > 0;
}
public:
FORCE_INLINE void Clear()
{
_count = 0;
}
void SetCapacity(int32 capacity);
void EnsureCapacity(int32 minCapacity);
void Push(const int32 item)
{
EnsureCapacity(_count + 1);
_data[_count++] = item;
EnsureCapacity(Count + 1);
Data[Count++] = item;
}
int32 Pop()
{
ASSERT(_count > 0);
const int32 item = _data[_count - 1];
_count--;
ASSERT(Count > 0);
const int32 item = Data[Count - 1];
Count--;
return item;
}
public:
void SetCapacity(const int32 capacity)
{
ASSERT(capacity >= 0);
if (capacity == _capacity)
return;
int32* newData = nullptr;
if (capacity > 0)
{
newData = (int32*)Allocator::Allocate(capacity * sizeof(int32));
}
if (_data)
{
if (newData && _count > 0)
{
for (int32 i = 0; i < _count && i < capacity; i++)
newData[i] = _data[i];
}
Allocator::Free(_data);
}
_data = newData;
_capacity = capacity;
_count = _count < _capacity ? _count : _capacity;
}
void EnsureCapacity(int32 minCapacity)
{
if (_capacity >= minCapacity)
return;
int32 num = _capacity == 0 ? 64 : _capacity * 2;
if (num < minCapacity)
num = minCapacity;
SetCapacity(num);
}
};
public:
@@ -142,7 +58,6 @@ public:
{
if (count < 2)
return;
auto& stack = SortingStack::Get();
// Push left and right
@@ -150,7 +65,7 @@ public:
stack.Push(count - 1);
// Keep sorting from stack while is not empty
while (stack.HasItems())
while (stack.Count)
{
// Pop right and left
int32 right = stack.Pop();
@@ -197,7 +112,6 @@ public:
{
if (count < 2)
return;
auto& stack = SortingStack::Get();
// Push left and right
@@ -205,7 +119,7 @@ public:
stack.Push(count - 1);
// Keep sorting from stack while is not empty
while (stack.HasItems())
while (stack.Count)
{
// Pop right and left
int32 right = stack.Pop();
@@ -246,7 +160,6 @@ public:
{
if (count < 2)
return;
auto& stack = SortingStack::Get();
// Push left and right
@@ -254,7 +167,7 @@ public:
stack.Push(count - 1);
// Keep sorting from stack while is not empty
while (stack.HasItems())
while (stack.Count != 0)
{
// Pop right and left
int32 right = stack.Pop();
@@ -300,7 +213,6 @@ public:
{
if (count < 2)
return;
auto& stack = SortingStack::Get();
// Push left and right
@@ -308,7 +220,7 @@ public:
stack.Push(count - 1);
// Keep sorting from stack while is not empty
while (stack.HasItems())
while (stack.Count)
{
// Pop right and left
int32 right = stack.Pop();
@@ -343,4 +255,91 @@ public:
}
}
}
/// <summary>
/// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection).
/// </summary>
/// <param name="inputKeys">The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
/// <param name="inputValues">The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
/// <param name="tmpKeys">The data pointer to the temporary sorting keys array.</param>
/// <param name="tmpValues">The data pointer to the temporary values array.</param>
/// <param name="count">The elements count.</param>
template<typename T, typename U>
static void RadixSort(T*& inputKeys, U*& inputValues, T* tmpKeys, U* tmpValues, int32 count)
{
// Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl
enum
{
RADIXSORT_BITS = 11,
RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS,
RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1
};
if (count < 2)
return;
T* keys = inputKeys;
T* tempKeys = tmpKeys;
U* values = inputValues;
U* tempValues = tmpValues;
uint32 histogram[RADIXSORT_HISTOGRAM_SIZE];
uint16 shift = 0;
int32 pass = 0;
for (; pass < 6; pass++)
{
Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE);
bool sorted = true;
T key = keys[0];
T prevKey = key;
for (int32 i = 0; i < count; i++)
{
key = keys[i];
const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK;
++histogram[index];
sorted &= prevKey <= key;
prevKey = key;
}
if (sorted)
{
goto end;
}
uint32 offset = 0;
for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i)
{
const uint32 cnt = histogram[i];
histogram[i] = offset;
offset += cnt;
}
for (int32 i = 0; i < count; i++)
{
const T k = keys[i];
const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK;
const uint32 dest = histogram[index]++;
tempKeys[dest] = k;
tempValues[dest] = values[i];
}
T* const swapKeys = tempKeys;
tempKeys = keys;
keys = swapKeys;
U* const swapValues = tempValues;
tempValues = values;
values = swapValues;
shift += RADIXSORT_BITS;
}
end:
if (pass & 1)
{
// Use temporary keys and values as a result
inputKeys = tmpKeys;
inputValues = tmpValues;
}
}
};

View File

@@ -98,6 +98,16 @@ public:
/// <param name="renderContext">The rendering context.</param>
/// <returns>The zero-based LOD index. Returns -1 if model should not be rendered.</returns>
API_FUNCTION() static int32 ComputeSkinnedModelLOD(const SkinnedModel* model, API_PARAM(Ref) const Vector3& origin, float radius, API_PARAM(Ref) const RenderContext& renderContext);
/// <summary>
/// Computes the sorting key for depth value (quantized)
/// Reference: http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/
/// </summary>
FORCE_INLINE static uint32 ComputeDistanceSortKey(float distance)
{
const uint32 distanceI = *((uint32*)&distance);
return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI;
}
};
// Get texture memory usage

View File

@@ -1,6 +1,7 @@
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
#include "Scene.h"
#include "SceneAsset.h"
#include "Engine/Level/Level.h"
#include "Engine/Content/AssetInfo.h"
#include "Engine/Content/Content.h"
@@ -24,6 +25,11 @@ SceneAsset::SceneAsset(const SpawnParams& params, const AssetInfo* info)
{
}
bool SceneAsset::IsInternalType() const
{
return true;
}
#define CSG_COLLIDER_NAME TEXT("CSG.Collider")
#define CSG_MODEL_NAME TEXT("CSG.Model")
@@ -235,6 +241,18 @@ void Scene::OnCsgModelChanged()
}
}
#if COMPILE_WITH_CSG_BUILDER
void Scene::OnCSGBuildEnd()
{
if (CSGData.CollisionData && TryGetCsgCollider() == nullptr)
CreateCsgCollider();
if (CSGData.Model && TryGetCsgModel() == nullptr)
CreateCsgModel();
}
#endif
void Scene::Serialize(SerializeStream& stream, const void* otherObj)
{
// Base

View File

@@ -4,7 +4,6 @@
#include "../Actor.h"
#include "../SceneInfo.h"
#include "Engine/Content/JsonAsset.h"
#include "SceneLightmapsData.h"
#include "SceneCSGData.h"
#include "SceneRendering.h"
@@ -150,13 +149,7 @@ private:
void OnCsgCollisionDataChanged();
void OnCsgModelChanged();
#if COMPILE_WITH_CSG_BUILDER
void OnCSGBuildEnd()
{
if (CSGData.CollisionData && TryGetCsgCollider() == nullptr)
CreateCsgCollider();
if (CSGData.Model && TryGetCsgModel() == nullptr)
CreateCsgModel();
}
void OnCSGBuildEnd();
#endif
public:
@@ -175,16 +168,3 @@ protected:
void BeginPlay(SceneBeginData* data) override;
void OnTransformChanged() override;
};
/// <summary>
/// The scene asset.
/// </summary>
API_CLASS(NoSpawn) class SceneAsset : public JsonAsset
{
DECLARE_ASSET_HEADER(SceneAsset);
protected:
bool IsInternalType() const override
{
return true;
}
};

View File

@@ -0,0 +1,15 @@
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Content/JsonAsset.h"
/// <summary>
/// The scene asset.
/// </summary>
API_CLASS(NoSpawn) class SceneAsset : public JsonAsset
{
DECLARE_ASSET_HEADER(SceneAsset);
protected:
bool IsInternalType() const override;
};

View File

@@ -12,6 +12,7 @@
#include "Engine/Graphics/GPUPipelineStatePermutations.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/DynamicBuffer.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Renderer/DrawCall.h"
#include "Engine/Renderer/RenderList.h"
@@ -57,15 +58,7 @@ public:
{ +0.5f, +0.5f, 1.0f, 1.0f },
{ -0.5f, +0.5f, 0.0f, 1.0f },
};
static uint16 indexBuffer[] =
{
0,
1,
2,
0,
2,
3,
};
static uint16 indexBuffer[] = { 0, 1, 2, 0, 2, 3, };
return VB->Init(GPUBufferDescription::Vertex(sizeof(SpriteParticleVertex), VertexCount, vertexBuffer)) || IB->Init(GPUBufferDescription::Index(sizeof(uint16), IndexCount, indexBuffer));
}
@@ -117,24 +110,9 @@ SpriteParticleRenderer SpriteRenderer;
namespace ParticlesDrawCPU
{
struct ParticleSortKey
{
uint32 Index;
float Order;
FORCE_INLINE static bool SortAscending(const ParticleSortKey& a, const ParticleSortKey& b)
{
return a.Order < b.Order;
};
FORCE_INLINE static bool SortDescending(const ParticleSortKey& a, const ParticleSortKey& b)
{
return b.Order < a.Order;
};
};
Array<uint32> SortedIndices;
Array<ParticleSortKey> ParticlesOrder;
Array<uint32> SortingKeys[2];
Array<int32> SortingIndices;
Array<int32> SortedIndices;
Array<float> RibbonTotalDistances;
}
@@ -192,12 +170,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
// Prepare sorting data
if (!buffer->GPU.SortedIndices)
buffer->AllocateSortBuffer();
auto& particlesOrder = ParticlesDrawCPU::ParticlesOrder;
particlesOrder.Clear();
particlesOrder.Resize(buffer->CPU.Count);
auto& sortedIndices = ParticlesDrawCPU::SortedIndices;
sortedIndices.Clear();
sortedIndices.Resize(buffer->Capacity * emitter->Graph.SortModules.Count());
// Execute all sorting modules
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
@@ -205,24 +177,27 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
auto module = emitter->Graph.SortModules[moduleIndex];
const int32 sortedIndicesOffset = module->SortedIndicesOffset;
const auto sortMode = static_cast<ParticleSortMode>(module->Values[2].AsInt);
if (sortedIndicesOffset >= sortedIndices.Count())
continue;
const int32 stride = buffer->Stride;
const int32 listSize = buffer->CPU.Count;
#define PREPARE_CACHE(list) (ParticlesDrawCPU::list).Clear(); (ParticlesDrawCPU::list).Resize(listSize)
PREPARE_CACHE(SortingKeys[0]);
PREPARE_CACHE(SortingKeys[1]);
PREPARE_CACHE(SortingIndices);
#undef PREPARE_CACHE
uint32* sortedKeys = ParticlesDrawCPU::SortingKeys[0].Get();
const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0;
switch (sortMode)
{
case ParticleSortMode::ViewDepth:
{
const Matrix viewProjection = renderContext.View.ViewProjection();
const int32 stride = buffer->Stride;
byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset();
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
{
for (int32 i = 0; i < buffer->CPU.Count; i++)
{
Vector3 position = *(Vector3*)positionPtr;
particlesOrder[i].Index = i;
particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, position)).W;
// TODO: use SIMD
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, *(Vector3*)positionPtr)).W) ^ sortKeyXor;
positionPtr += stride;
}
}
@@ -230,29 +205,22 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
{
for (int32 i = 0; i < buffer->CPU.Count; i++)
{
Vector3 position = *(Vector3*)positionPtr;
particlesOrder[i].Index = i;
particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, position).W;
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, *(Vector3*)positionPtr).W) ^ sortKeyXor;
positionPtr += stride;
}
}
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
break;
}
case ParticleSortMode::ViewDistance:
{
const Vector3 viewPosition = renderContext.View.Position;
const int32 stride = buffer->Stride;
byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset();
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
{
for (int32 i = 0; i < buffer->CPU.Count; i++)
{
Vector3 position = *(Vector3*)positionPtr;
particlesOrder[i].Index = i;
particlesOrder[i].Order = (viewPosition - Vector3::Transform(position, drawCall.World)).LengthSquared();
// TODO: use SIMD
sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - Vector3::Transform(*(Vector3*)positionPtr, drawCall.World)).LengthSquared()) ^ sortKeyXor;
positionPtr += stride;
}
}
@@ -260,14 +228,11 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
{
for (int32 i = 0; i < buffer->CPU.Count; i++)
{
Vector3 position = *(Vector3*)positionPtr;
particlesOrder[i].Index = i;
particlesOrder[i].Order = (viewPosition - position).LengthSquared();
// TODO: use SIMD
sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - *(Vector3*)positionPtr).LengthSquared()) ^ sortKeyXor;
positionPtr += stride;
}
}
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
break;
}
case ParticleSortMode::CustomAscending:
@@ -276,20 +241,12 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
int32 attributeIdx = module->Attributes[0];
if (attributeIdx == -1)
break;
const int32 stride = buffer->Stride;
byte* attributePtr = buffer->CPU.Buffer.Get() + emitter->Graph.Layout.Attributes[attributeIdx].Offset;
for (int32 i = 0; i < buffer->CPU.Count; i++)
{
particlesOrder[i].Index = i;
particlesOrder[i].Order = *(float*)attributePtr;
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(*(float*)attributePtr) ^ sortKeyXor;
attributePtr += stride;
}
if (sortMode == ParticleSortMode::CustomAscending)
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortAscending);
else
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
break;
}
#if !BUILD_RELEASE
@@ -298,17 +255,31 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
#endif
}
// Copy sorted indices
for (int32 k = 0; k < buffer->CPU.Count; k++)
sortedIndices[sortedIndicesOffset + k] = particlesOrder[k].Index;
}
// Generate sorting indices
int32* sortedIndices;
{
ParticlesDrawCPU::SortedIndices.Resize(listSize);
sortedIndices = ParticlesDrawCPU::SortedIndices.Get();
for (int i = 0; i < listSize; i++)
sortedIndices[i] = i;
}
// Upload CPU particles indices
context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices.Get(), sortedIndices.Count() * sizeof(int32));
// Sort keys with indices
{
Sorting::RadixSort(sortedKeys, sortedIndices, ParticlesDrawCPU::SortingKeys[1].Get(), ParticlesDrawCPU::SortingIndices.Get(), listSize);
}
// Upload CPU particles indices
{
context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(int32), sortedIndicesOffset);
}
}
}
// Upload CPU particles data to GPU
context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride);
{
context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride);
}
// Check if need to setup ribbon modules
int32 ribbonModuleIndex = 0;
@@ -409,7 +380,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
ribbonSegmentDistancesBuffer = GPUDevice::Instance->CreateBuffer(TEXT("RibbonSegmentDistances"));
ribbonSegmentDistancesBuffer->Init(GPUBufferDescription::Typed(buffer->Capacity, PixelFormat::R32_Float, false, GPUResourceUsage::Dynamic));
}
context->UpdateBuffer(ribbonSegmentDistancesBuffer, totalDistances.Get(), totalDistances.Count() * sizeof(float));
}
@@ -1195,7 +1165,9 @@ void ParticleManagerService::Dispose()
}
CleanupGPUParticlesSorting();
#endif
ParticlesDrawCPU::ParticlesOrder.SetCapacity(0);
ParticlesDrawCPU::SortingKeys[0].SetCapacity(0);
ParticlesDrawCPU::SortingKeys[1].SetCapacity(0);
ParticlesDrawCPU::SortingIndices.SetCapacity(0);
ParticlesDrawCPU::SortedIndices.SetCapacity(0);
ParticlesDrawCPU::RibbonTotalDistances.SetCapacity(0);

View File

@@ -9,6 +9,7 @@
#include "Engine/Graphics/PostProcessBase.h"
#include "Engine/Graphics/GPULimits.h"
#include "Engine/Graphics/RenderTargetPool.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Profiler/Profiler.h"
#include "Engine/Content/Assets/CubeTexture.h"
#include "Engine/Level/Scene/Lightmap.h"
@@ -32,8 +33,6 @@ namespace
Array<RenderList*> FreeRenderList;
}
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
void RendererDirectionalLightData::SetupLightData(LightData* data, const RenderView& view, bool useShadow) const
{
data->SpotAngles.X = -2.0f;
@@ -399,108 +398,6 @@ void RenderList::AddDrawCall(DrawPass drawModes, StaticFlags staticFlags, DrawCa
}
}
uint32 ComputeDistance(float distance)
{
// Compute sort key (http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/)
uint32 distanceI = *((uint32*)&distance);
return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI;
}
/// <summary>
/// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection).
/// </summary>
/// <param name="inputKeys">The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
/// <param name="inputValues">The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
/// <param name="tmpKeys">The data pointer to the temporary sorting keys array.</param>
/// <param name="tmpValues">The data pointer to the temporary values array.</param>
/// <param name="count">The elements count.</param>
template<typename T, typename U>
static void RadixSort(T*& inputKeys, U* inputValues, T* tmpKeys, U* tmpValues, int32 count)
{
// Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl
enum
{
RADIXSORT_BITS = 11,
RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS,
RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1
};
if (count < 2)
return;
T* keys = inputKeys;
T* tempKeys = tmpKeys;
U* values = inputValues;
U* tempValues = tmpValues;
uint32 histogram[RADIXSORT_HISTOGRAM_SIZE];
uint16 shift = 0;
int32 pass = 0;
for (; pass < 6; pass++)
{
Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE);
bool sorted = true;
T key = keys[0];
T prevKey = key;
for (int32 i = 0; i < count; i++)
{
key = keys[i];
const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK;
++histogram[index];
sorted &= prevKey <= key;
prevKey = key;
}
if (sorted)
{
goto end;
}
uint32 offset = 0;
for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i)
{
const uint32 cnt = histogram[i];
histogram[i] = offset;
offset += cnt;
}
for (int32 i = 0; i < count; i++)
{
const T k = keys[i];
const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK;
const uint32 dest = histogram[index]++;
tempKeys[dest] = k;
tempValues[dest] = values[i];
}
T* const swapKeys = tempKeys;
tempKeys = keys;
keys = swapKeys;
U* const swapValues = tempValues;
tempValues = values;
values = swapValues;
shift += RADIXSORT_BITS;
}
end:
if (pass & 1)
{
// Use temporary keys as a result
inputKeys = tmpKeys;
#if 0
// Use temporary values as a result
inputValues = tmpValues;
#else
// Odd number of passes needs to do copy to the destination
Platform::MemoryCopy(inputValues, tmpValues, sizeof(U) * count);
#endif
}
}
namespace
{
/// <summary>
@@ -530,9 +427,11 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
const Plane plane(renderContext.View.Position, renderContext.View.Direction);
// Peek shared memory
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
PREPARE_CACHE(SortingKeys[0]);
PREPARE_CACHE(SortingKeys[1]);
PREPARE_CACHE(SortingIndices);
#undef PREPARE_CACHE
uint64* sortedKeys = SortingKeys[0].Get();
// Generate sort keys (by depth) and batch keys (higher bits)
@@ -541,7 +440,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
{
auto& drawCall = DrawCalls[list.Indices[i]];
const auto distance = CollisionsHelper::DistancePlanePoint(plane, drawCall.ObjectPosition);
const uint32 sortKey = ComputeDistance(distance) ^ sortKeyXor;
const uint32 sortKey = RenderTools::ComputeDistanceSortKey(distance) ^ sortKeyXor;
int32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]);
@@ -560,7 +459,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
}
// Sort draw calls indices
RadixSort(sortedKeys, list.Indices.Get(), SortingKeys[1].Get(), SortingIndices.Get(), listSize);
int32* resultIndices = list.Indices.Get();
Sorting::RadixSort(sortedKeys, resultIndices, SortingKeys[1].Get(), SortingIndices.Get(), listSize);
if (resultIndices != list.Indices.Get())
Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize);
// Perform draw calls batching
list.Batches.Clear();