Optimize CPU particles sorting with Radix sort
This commit is contained in:
@@ -2,8 +2,6 @@
|
||||
|
||||
#include "EditorScene.h"
|
||||
|
||||
#include "Engine/Debug/DebugDraw.h"
|
||||
|
||||
EditorScene::EditorScene(const SpawnParams& params)
|
||||
: Scene(params)
|
||||
{
|
||||
|
||||
@@ -10,3 +10,42 @@ Sorting::SortingStack& Sorting::SortingStack::Get()
|
||||
{
|
||||
return SortingStacks.Get();
|
||||
}
|
||||
|
||||
Sorting::SortingStack::SortingStack()
|
||||
{
|
||||
}
|
||||
|
||||
Sorting::SortingStack::~SortingStack()
|
||||
{
|
||||
Allocator::Free(Data);
|
||||
}
|
||||
|
||||
void Sorting::SortingStack::SetCapacity(const int32 capacity)
|
||||
{
|
||||
ASSERT(capacity >= 0);
|
||||
if (capacity == Capacity)
|
||||
return;
|
||||
int32* newData = nullptr;
|
||||
if (capacity > 0)
|
||||
newData = (int32*)Allocator::Allocate(capacity * sizeof(int32));
|
||||
const int32 newCount = Count < capacity ? Count : capacity;
|
||||
if (Data)
|
||||
{
|
||||
if (newData && newCount)
|
||||
Platform::MemoryCopy(newData, Data, newCount * sizeof(int32));
|
||||
Allocator::Free(Data);
|
||||
}
|
||||
Data = newData;
|
||||
Capacity = capacity;
|
||||
Count = newCount;
|
||||
}
|
||||
|
||||
void Sorting::SortingStack::EnsureCapacity(int32 minCapacity)
|
||||
{
|
||||
if (Capacity >= minCapacity)
|
||||
return;
|
||||
int32 num = Capacity == 0 ? 64 : Capacity * 2;
|
||||
if (num < minCapacity)
|
||||
num = minCapacity;
|
||||
SetCapacity(num);
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Engine/Core/Templates.h"
|
||||
#include "Engine/Core/Memory/Memory.h"
|
||||
#include "Engine/Core/Types/BaseTypes.h"
|
||||
#include "Engine/Platform/Platform.h"
|
||||
|
||||
@@ -23,111 +21,29 @@ public:
|
||||
|
||||
static SortingStack& Get();
|
||||
|
||||
public:
|
||||
int32 Count = 0;
|
||||
int32 Capacity = 0;
|
||||
int32* Data = nullptr;
|
||||
|
||||
int32 _count;
|
||||
int32 _capacity;
|
||||
int32* _data;
|
||||
SortingStack();
|
||||
~SortingStack();
|
||||
|
||||
public:
|
||||
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="SortingStack"/> class.
|
||||
/// </summary>
|
||||
SortingStack()
|
||||
: _count(0)
|
||||
, _capacity(0)
|
||||
, _data(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Finalizes an instance of the <see cref="SortingStack"/> class.
|
||||
/// </summary>
|
||||
~SortingStack()
|
||||
{
|
||||
Allocator::Free(_data);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
FORCE_INLINE int32 Count() const
|
||||
{
|
||||
return _count;
|
||||
}
|
||||
|
||||
FORCE_INLINE int32 Capacity() const
|
||||
{
|
||||
return _capacity;
|
||||
}
|
||||
|
||||
FORCE_INLINE bool HasItems() const
|
||||
{
|
||||
return _count > 0;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
FORCE_INLINE void Clear()
|
||||
{
|
||||
_count = 0;
|
||||
}
|
||||
void SetCapacity(int32 capacity);
|
||||
void EnsureCapacity(int32 minCapacity);
|
||||
|
||||
void Push(const int32 item)
|
||||
{
|
||||
EnsureCapacity(_count + 1);
|
||||
_data[_count++] = item;
|
||||
EnsureCapacity(Count + 1);
|
||||
Data[Count++] = item;
|
||||
}
|
||||
|
||||
int32 Pop()
|
||||
{
|
||||
ASSERT(_count > 0);
|
||||
const int32 item = _data[_count - 1];
|
||||
_count--;
|
||||
ASSERT(Count > 0);
|
||||
const int32 item = Data[Count - 1];
|
||||
Count--;
|
||||
return item;
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
void SetCapacity(const int32 capacity)
|
||||
{
|
||||
ASSERT(capacity >= 0);
|
||||
|
||||
if (capacity == _capacity)
|
||||
return;
|
||||
|
||||
int32* newData = nullptr;
|
||||
if (capacity > 0)
|
||||
{
|
||||
newData = (int32*)Allocator::Allocate(capacity * sizeof(int32));
|
||||
}
|
||||
|
||||
if (_data)
|
||||
{
|
||||
if (newData && _count > 0)
|
||||
{
|
||||
for (int32 i = 0; i < _count && i < capacity; i++)
|
||||
newData[i] = _data[i];
|
||||
}
|
||||
Allocator::Free(_data);
|
||||
}
|
||||
|
||||
_data = newData;
|
||||
_capacity = capacity;
|
||||
_count = _count < _capacity ? _count : _capacity;
|
||||
}
|
||||
|
||||
void EnsureCapacity(int32 minCapacity)
|
||||
{
|
||||
if (_capacity >= minCapacity)
|
||||
return;
|
||||
|
||||
int32 num = _capacity == 0 ? 64 : _capacity * 2;
|
||||
if (num < minCapacity)
|
||||
num = minCapacity;
|
||||
|
||||
SetCapacity(num);
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
@@ -142,7 +58,6 @@ public:
|
||||
{
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
auto& stack = SortingStack::Get();
|
||||
|
||||
// Push left and right
|
||||
@@ -150,7 +65,7 @@ public:
|
||||
stack.Push(count - 1);
|
||||
|
||||
// Keep sorting from stack while is not empty
|
||||
while (stack.HasItems())
|
||||
while (stack.Count)
|
||||
{
|
||||
// Pop right and left
|
||||
int32 right = stack.Pop();
|
||||
@@ -197,7 +112,6 @@ public:
|
||||
{
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
auto& stack = SortingStack::Get();
|
||||
|
||||
// Push left and right
|
||||
@@ -205,7 +119,7 @@ public:
|
||||
stack.Push(count - 1);
|
||||
|
||||
// Keep sorting from stack while is not empty
|
||||
while (stack.HasItems())
|
||||
while (stack.Count)
|
||||
{
|
||||
// Pop right and left
|
||||
int32 right = stack.Pop();
|
||||
@@ -246,7 +160,6 @@ public:
|
||||
{
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
auto& stack = SortingStack::Get();
|
||||
|
||||
// Push left and right
|
||||
@@ -254,7 +167,7 @@ public:
|
||||
stack.Push(count - 1);
|
||||
|
||||
// Keep sorting from stack while is not empty
|
||||
while (stack.HasItems())
|
||||
while (stack.Count != 0)
|
||||
{
|
||||
// Pop right and left
|
||||
int32 right = stack.Pop();
|
||||
@@ -300,7 +213,6 @@ public:
|
||||
{
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
auto& stack = SortingStack::Get();
|
||||
|
||||
// Push left and right
|
||||
@@ -308,7 +220,7 @@ public:
|
||||
stack.Push(count - 1);
|
||||
|
||||
// Keep sorting from stack while is not empty
|
||||
while (stack.HasItems())
|
||||
while (stack.Count)
|
||||
{
|
||||
// Pop right and left
|
||||
int32 right = stack.Pop();
|
||||
@@ -343,4 +255,91 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection).
|
||||
/// </summary>
|
||||
/// <param name="inputKeys">The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
|
||||
/// <param name="inputValues">The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
|
||||
/// <param name="tmpKeys">The data pointer to the temporary sorting keys array.</param>
|
||||
/// <param name="tmpValues">The data pointer to the temporary values array.</param>
|
||||
/// <param name="count">The elements count.</param>
|
||||
template<typename T, typename U>
|
||||
static void RadixSort(T*& inputKeys, U*& inputValues, T* tmpKeys, U* tmpValues, int32 count)
|
||||
{
|
||||
// Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl
|
||||
enum
|
||||
{
|
||||
RADIXSORT_BITS = 11,
|
||||
RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS,
|
||||
RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1
|
||||
};
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
T* keys = inputKeys;
|
||||
T* tempKeys = tmpKeys;
|
||||
U* values = inputValues;
|
||||
U* tempValues = tmpValues;
|
||||
|
||||
uint32 histogram[RADIXSORT_HISTOGRAM_SIZE];
|
||||
uint16 shift = 0;
|
||||
int32 pass = 0;
|
||||
for (; pass < 6; pass++)
|
||||
{
|
||||
Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE);
|
||||
|
||||
bool sorted = true;
|
||||
T key = keys[0];
|
||||
T prevKey = key;
|
||||
for (int32 i = 0; i < count; i++)
|
||||
{
|
||||
key = keys[i];
|
||||
const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK;
|
||||
++histogram[index];
|
||||
sorted &= prevKey <= key;
|
||||
prevKey = key;
|
||||
}
|
||||
|
||||
if (sorted)
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
|
||||
uint32 offset = 0;
|
||||
for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i)
|
||||
{
|
||||
const uint32 cnt = histogram[i];
|
||||
histogram[i] = offset;
|
||||
offset += cnt;
|
||||
}
|
||||
|
||||
for (int32 i = 0; i < count; i++)
|
||||
{
|
||||
const T k = keys[i];
|
||||
const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK;
|
||||
const uint32 dest = histogram[index]++;
|
||||
tempKeys[dest] = k;
|
||||
tempValues[dest] = values[i];
|
||||
}
|
||||
|
||||
T* const swapKeys = tempKeys;
|
||||
tempKeys = keys;
|
||||
keys = swapKeys;
|
||||
|
||||
U* const swapValues = tempValues;
|
||||
tempValues = values;
|
||||
values = swapValues;
|
||||
|
||||
shift += RADIXSORT_BITS;
|
||||
}
|
||||
|
||||
end:
|
||||
if (pass & 1)
|
||||
{
|
||||
// Use temporary keys and values as a result
|
||||
inputKeys = tmpKeys;
|
||||
inputValues = tmpValues;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -98,6 +98,16 @@ public:
|
||||
/// <param name="renderContext">The rendering context.</param>
|
||||
/// <returns>The zero-based LOD index. Returns -1 if model should not be rendered.</returns>
|
||||
API_FUNCTION() static int32 ComputeSkinnedModelLOD(const SkinnedModel* model, API_PARAM(Ref) const Vector3& origin, float radius, API_PARAM(Ref) const RenderContext& renderContext);
|
||||
|
||||
/// <summary>
|
||||
/// Computes the sorting key for depth value (quantized)
|
||||
/// Reference: http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/
|
||||
/// </summary>
|
||||
FORCE_INLINE static uint32 ComputeDistanceSortKey(float distance)
|
||||
{
|
||||
const uint32 distanceI = *((uint32*)&distance);
|
||||
return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI;
|
||||
}
|
||||
};
|
||||
|
||||
// Get texture memory usage
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
|
||||
|
||||
#include "Scene.h"
|
||||
#include "SceneAsset.h"
|
||||
#include "Engine/Level/Level.h"
|
||||
#include "Engine/Content/AssetInfo.h"
|
||||
#include "Engine/Content/Content.h"
|
||||
@@ -24,6 +25,11 @@ SceneAsset::SceneAsset(const SpawnParams& params, const AssetInfo* info)
|
||||
{
|
||||
}
|
||||
|
||||
bool SceneAsset::IsInternalType() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
#define CSG_COLLIDER_NAME TEXT("CSG.Collider")
|
||||
#define CSG_MODEL_NAME TEXT("CSG.Model")
|
||||
|
||||
@@ -235,6 +241,18 @@ void Scene::OnCsgModelChanged()
|
||||
}
|
||||
}
|
||||
|
||||
#if COMPILE_WITH_CSG_BUILDER
|
||||
|
||||
void Scene::OnCSGBuildEnd()
|
||||
{
|
||||
if (CSGData.CollisionData && TryGetCsgCollider() == nullptr)
|
||||
CreateCsgCollider();
|
||||
if (CSGData.Model && TryGetCsgModel() == nullptr)
|
||||
CreateCsgModel();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void Scene::Serialize(SerializeStream& stream, const void* otherObj)
|
||||
{
|
||||
// Base
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "../Actor.h"
|
||||
#include "../SceneInfo.h"
|
||||
#include "Engine/Content/JsonAsset.h"
|
||||
#include "SceneLightmapsData.h"
|
||||
#include "SceneCSGData.h"
|
||||
#include "SceneRendering.h"
|
||||
@@ -150,13 +149,7 @@ private:
|
||||
void OnCsgCollisionDataChanged();
|
||||
void OnCsgModelChanged();
|
||||
#if COMPILE_WITH_CSG_BUILDER
|
||||
void OnCSGBuildEnd()
|
||||
{
|
||||
if (CSGData.CollisionData && TryGetCsgCollider() == nullptr)
|
||||
CreateCsgCollider();
|
||||
if (CSGData.Model && TryGetCsgModel() == nullptr)
|
||||
CreateCsgModel();
|
||||
}
|
||||
void OnCSGBuildEnd();
|
||||
#endif
|
||||
|
||||
public:
|
||||
@@ -175,16 +168,3 @@ protected:
|
||||
void BeginPlay(SceneBeginData* data) override;
|
||||
void OnTransformChanged() override;
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// The scene asset.
|
||||
/// </summary>
|
||||
API_CLASS(NoSpawn) class SceneAsset : public JsonAsset
|
||||
{
|
||||
DECLARE_ASSET_HEADER(SceneAsset);
|
||||
protected:
|
||||
bool IsInternalType() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
15
Source/Engine/Level/Scene/SceneAsset.h
Normal file
15
Source/Engine/Level/Scene/SceneAsset.h
Normal file
@@ -0,0 +1,15 @@
|
||||
// Copyright (c) 2012-2021 Wojciech Figat. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Engine/Content/JsonAsset.h"
|
||||
|
||||
/// <summary>
|
||||
/// The scene asset.
|
||||
/// </summary>
|
||||
API_CLASS(NoSpawn) class SceneAsset : public JsonAsset
|
||||
{
|
||||
DECLARE_ASSET_HEADER(SceneAsset);
|
||||
protected:
|
||||
bool IsInternalType() const override;
|
||||
};
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "Engine/Graphics/GPUPipelineStatePermutations.h"
|
||||
#include "Engine/Graphics/RenderTask.h"
|
||||
#include "Engine/Graphics/DynamicBuffer.h"
|
||||
#include "Engine/Graphics/RenderTools.h"
|
||||
#include "Engine/Profiler/ProfilerCPU.h"
|
||||
#include "Engine/Renderer/DrawCall.h"
|
||||
#include "Engine/Renderer/RenderList.h"
|
||||
@@ -57,15 +58,7 @@ public:
|
||||
{ +0.5f, +0.5f, 1.0f, 1.0f },
|
||||
{ -0.5f, +0.5f, 0.0f, 1.0f },
|
||||
};
|
||||
static uint16 indexBuffer[] =
|
||||
{
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
0,
|
||||
2,
|
||||
3,
|
||||
};
|
||||
static uint16 indexBuffer[] = { 0, 1, 2, 0, 2, 3, };
|
||||
return VB->Init(GPUBufferDescription::Vertex(sizeof(SpriteParticleVertex), VertexCount, vertexBuffer)) || IB->Init(GPUBufferDescription::Index(sizeof(uint16), IndexCount, indexBuffer));
|
||||
}
|
||||
|
||||
@@ -117,24 +110,9 @@ SpriteParticleRenderer SpriteRenderer;
|
||||
|
||||
namespace ParticlesDrawCPU
|
||||
{
|
||||
struct ParticleSortKey
|
||||
{
|
||||
uint32 Index;
|
||||
float Order;
|
||||
|
||||
FORCE_INLINE static bool SortAscending(const ParticleSortKey& a, const ParticleSortKey& b)
|
||||
{
|
||||
return a.Order < b.Order;
|
||||
};
|
||||
|
||||
FORCE_INLINE static bool SortDescending(const ParticleSortKey& a, const ParticleSortKey& b)
|
||||
{
|
||||
return b.Order < a.Order;
|
||||
};
|
||||
};
|
||||
|
||||
Array<uint32> SortedIndices;
|
||||
Array<ParticleSortKey> ParticlesOrder;
|
||||
Array<uint32> SortingKeys[2];
|
||||
Array<int32> SortingIndices;
|
||||
Array<int32> SortedIndices;
|
||||
Array<float> RibbonTotalDistances;
|
||||
}
|
||||
|
||||
@@ -192,12 +170,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
// Prepare sorting data
|
||||
if (!buffer->GPU.SortedIndices)
|
||||
buffer->AllocateSortBuffer();
|
||||
auto& particlesOrder = ParticlesDrawCPU::ParticlesOrder;
|
||||
particlesOrder.Clear();
|
||||
particlesOrder.Resize(buffer->CPU.Count);
|
||||
auto& sortedIndices = ParticlesDrawCPU::SortedIndices;
|
||||
sortedIndices.Clear();
|
||||
sortedIndices.Resize(buffer->Capacity * emitter->Graph.SortModules.Count());
|
||||
|
||||
// Execute all sorting modules
|
||||
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
|
||||
@@ -205,24 +177,27 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
auto module = emitter->Graph.SortModules[moduleIndex];
|
||||
const int32 sortedIndicesOffset = module->SortedIndicesOffset;
|
||||
const auto sortMode = static_cast<ParticleSortMode>(module->Values[2].AsInt);
|
||||
if (sortedIndicesOffset >= sortedIndices.Count())
|
||||
continue;
|
||||
|
||||
const int32 stride = buffer->Stride;
|
||||
const int32 listSize = buffer->CPU.Count;
|
||||
#define PREPARE_CACHE(list) (ParticlesDrawCPU::list).Clear(); (ParticlesDrawCPU::list).Resize(listSize)
|
||||
PREPARE_CACHE(SortingKeys[0]);
|
||||
PREPARE_CACHE(SortingKeys[1]);
|
||||
PREPARE_CACHE(SortingIndices);
|
||||
#undef PREPARE_CACHE
|
||||
uint32* sortedKeys = ParticlesDrawCPU::SortingKeys[0].Get();
|
||||
const uint32 sortKeyXor = sortMode != ParticleSortMode::CustomAscending ? MAX_uint32 : 0;
|
||||
switch (sortMode)
|
||||
{
|
||||
case ParticleSortMode::ViewDepth:
|
||||
{
|
||||
const Matrix viewProjection = renderContext.View.ViewProjection();
|
||||
const int32 stride = buffer->Stride;
|
||||
byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset();
|
||||
|
||||
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
|
||||
{
|
||||
for (int32 i = 0; i < buffer->CPU.Count; i++)
|
||||
{
|
||||
Vector3 position = *(Vector3*)positionPtr;
|
||||
particlesOrder[i].Index = i;
|
||||
particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, position)).W;
|
||||
// TODO: use SIMD
|
||||
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, Matrix::TransformPosition(drawCall.World, *(Vector3*)positionPtr)).W) ^ sortKeyXor;
|
||||
positionPtr += stride;
|
||||
}
|
||||
}
|
||||
@@ -230,29 +205,22 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
{
|
||||
for (int32 i = 0; i < buffer->CPU.Count; i++)
|
||||
{
|
||||
Vector3 position = *(Vector3*)positionPtr;
|
||||
particlesOrder[i].Index = i;
|
||||
particlesOrder[i].Order = Matrix::TransformPosition(viewProjection, position).W;
|
||||
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(Matrix::TransformPosition(viewProjection, *(Vector3*)positionPtr).W) ^ sortKeyXor;
|
||||
positionPtr += stride;
|
||||
}
|
||||
}
|
||||
|
||||
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
|
||||
break;
|
||||
}
|
||||
case ParticleSortMode::ViewDistance:
|
||||
{
|
||||
const Vector3 viewPosition = renderContext.View.Position;
|
||||
const int32 stride = buffer->Stride;
|
||||
byte* positionPtr = buffer->CPU.Buffer.Get() + emitter->Graph.GetPositionAttributeOffset();
|
||||
|
||||
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
|
||||
{
|
||||
for (int32 i = 0; i < buffer->CPU.Count; i++)
|
||||
{
|
||||
Vector3 position = *(Vector3*)positionPtr;
|
||||
particlesOrder[i].Index = i;
|
||||
particlesOrder[i].Order = (viewPosition - Vector3::Transform(position, drawCall.World)).LengthSquared();
|
||||
// TODO: use SIMD
|
||||
sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - Vector3::Transform(*(Vector3*)positionPtr, drawCall.World)).LengthSquared()) ^ sortKeyXor;
|
||||
positionPtr += stride;
|
||||
}
|
||||
}
|
||||
@@ -260,14 +228,11 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
{
|
||||
for (int32 i = 0; i < buffer->CPU.Count; i++)
|
||||
{
|
||||
Vector3 position = *(Vector3*)positionPtr;
|
||||
particlesOrder[i].Index = i;
|
||||
particlesOrder[i].Order = (viewPosition - position).LengthSquared();
|
||||
// TODO: use SIMD
|
||||
sortedKeys[i] = RenderTools::ComputeDistanceSortKey((viewPosition - *(Vector3*)positionPtr).LengthSquared()) ^ sortKeyXor;
|
||||
positionPtr += stride;
|
||||
}
|
||||
}
|
||||
|
||||
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
|
||||
break;
|
||||
}
|
||||
case ParticleSortMode::CustomAscending:
|
||||
@@ -276,20 +241,12 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
int32 attributeIdx = module->Attributes[0];
|
||||
if (attributeIdx == -1)
|
||||
break;
|
||||
const int32 stride = buffer->Stride;
|
||||
byte* attributePtr = buffer->CPU.Buffer.Get() + emitter->Graph.Layout.Attributes[attributeIdx].Offset;
|
||||
|
||||
for (int32 i = 0; i < buffer->CPU.Count; i++)
|
||||
{
|
||||
particlesOrder[i].Index = i;
|
||||
particlesOrder[i].Order = *(float*)attributePtr;
|
||||
sortedKeys[i] = RenderTools::ComputeDistanceSortKey(*(float*)attributePtr) ^ sortKeyXor;
|
||||
attributePtr += stride;
|
||||
}
|
||||
|
||||
if (sortMode == ParticleSortMode::CustomAscending)
|
||||
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortAscending);
|
||||
else
|
||||
Sorting::QuickSort(particlesOrder.Get(), particlesOrder.Count(), &ParticlesDrawCPU::ParticleSortKey::SortDescending);
|
||||
break;
|
||||
}
|
||||
#if !BUILD_RELEASE
|
||||
@@ -298,17 +255,31 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
#endif
|
||||
}
|
||||
|
||||
// Copy sorted indices
|
||||
for (int32 k = 0; k < buffer->CPU.Count; k++)
|
||||
sortedIndices[sortedIndicesOffset + k] = particlesOrder[k].Index;
|
||||
}
|
||||
// Generate sorting indices
|
||||
int32* sortedIndices;
|
||||
{
|
||||
ParticlesDrawCPU::SortedIndices.Resize(listSize);
|
||||
sortedIndices = ParticlesDrawCPU::SortedIndices.Get();
|
||||
for (int i = 0; i < listSize; i++)
|
||||
sortedIndices[i] = i;
|
||||
}
|
||||
|
||||
// Upload CPU particles indices
|
||||
context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices.Get(), sortedIndices.Count() * sizeof(int32));
|
||||
// Sort keys with indices
|
||||
{
|
||||
Sorting::RadixSort(sortedKeys, sortedIndices, ParticlesDrawCPU::SortingKeys[1].Get(), ParticlesDrawCPU::SortingIndices.Get(), listSize);
|
||||
}
|
||||
|
||||
// Upload CPU particles indices
|
||||
{
|
||||
context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(int32), sortedIndicesOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Upload CPU particles data to GPU
|
||||
context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride);
|
||||
{
|
||||
context->UpdateBuffer(buffer->GPU.Buffer, buffer->CPU.Buffer.Get(), buffer->CPU.Count * buffer->Stride);
|
||||
}
|
||||
|
||||
// Check if need to setup ribbon modules
|
||||
int32 ribbonModuleIndex = 0;
|
||||
@@ -409,7 +380,6 @@ void DrawEmitterCPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCa
|
||||
ribbonSegmentDistancesBuffer = GPUDevice::Instance->CreateBuffer(TEXT("RibbonSegmentDistances"));
|
||||
ribbonSegmentDistancesBuffer->Init(GPUBufferDescription::Typed(buffer->Capacity, PixelFormat::R32_Float, false, GPUResourceUsage::Dynamic));
|
||||
}
|
||||
|
||||
context->UpdateBuffer(ribbonSegmentDistancesBuffer, totalDistances.Get(), totalDistances.Count() * sizeof(float));
|
||||
}
|
||||
|
||||
@@ -1195,7 +1165,9 @@ void ParticleManagerService::Dispose()
|
||||
}
|
||||
CleanupGPUParticlesSorting();
|
||||
#endif
|
||||
ParticlesDrawCPU::ParticlesOrder.SetCapacity(0);
|
||||
ParticlesDrawCPU::SortingKeys[0].SetCapacity(0);
|
||||
ParticlesDrawCPU::SortingKeys[1].SetCapacity(0);
|
||||
ParticlesDrawCPU::SortingIndices.SetCapacity(0);
|
||||
ParticlesDrawCPU::SortedIndices.SetCapacity(0);
|
||||
ParticlesDrawCPU::RibbonTotalDistances.SetCapacity(0);
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "Engine/Graphics/PostProcessBase.h"
|
||||
#include "Engine/Graphics/GPULimits.h"
|
||||
#include "Engine/Graphics/RenderTargetPool.h"
|
||||
#include "Engine/Graphics/RenderTools.h"
|
||||
#include "Engine/Profiler/Profiler.h"
|
||||
#include "Engine/Content/Assets/CubeTexture.h"
|
||||
#include "Engine/Level/Scene/Lightmap.h"
|
||||
@@ -32,8 +33,6 @@ namespace
|
||||
Array<RenderList*> FreeRenderList;
|
||||
}
|
||||
|
||||
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
|
||||
|
||||
void RendererDirectionalLightData::SetupLightData(LightData* data, const RenderView& view, bool useShadow) const
|
||||
{
|
||||
data->SpotAngles.X = -2.0f;
|
||||
@@ -399,108 +398,6 @@ void RenderList::AddDrawCall(DrawPass drawModes, StaticFlags staticFlags, DrawCa
|
||||
}
|
||||
}
|
||||
|
||||
uint32 ComputeDistance(float distance)
|
||||
{
|
||||
// Compute sort key (http://aras-p.info/blog/2014/01/16/rough-sorting-by-depth/)
|
||||
uint32 distanceI = *((uint32*)&distance);
|
||||
return ((uint32)(-(int32)(distanceI >> 31)) | 0x80000000) ^ distanceI;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sorts the linear data array using Radix Sort algorithm (uses temporary keys collection).
|
||||
/// </summary>
|
||||
/// <param name="inputKeys">The data pointer to the input sorting keys array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
|
||||
/// <param name="inputValues">The data pointer to the input values array. When this method completes it contains a pointer to the original data or the temporary depending on the algorithm passes count. Use it as a results container.</param>
|
||||
/// <param name="tmpKeys">The data pointer to the temporary sorting keys array.</param>
|
||||
/// <param name="tmpValues">The data pointer to the temporary values array.</param>
|
||||
/// <param name="count">The elements count.</param>
|
||||
template<typename T, typename U>
|
||||
static void RadixSort(T*& inputKeys, U* inputValues, T* tmpKeys, U* tmpValues, int32 count)
|
||||
{
|
||||
// Based on: https://github.com/bkaradzic/bx/blob/master/include/bx/inline/sort.inl
|
||||
enum
|
||||
{
|
||||
RADIXSORT_BITS = 11,
|
||||
RADIXSORT_HISTOGRAM_SIZE = 1 << RADIXSORT_BITS,
|
||||
RADIXSORT_BIT_MASK = RADIXSORT_HISTOGRAM_SIZE - 1
|
||||
};
|
||||
|
||||
if (count < 2)
|
||||
return;
|
||||
|
||||
T* keys = inputKeys;
|
||||
T* tempKeys = tmpKeys;
|
||||
U* values = inputValues;
|
||||
U* tempValues = tmpValues;
|
||||
|
||||
uint32 histogram[RADIXSORT_HISTOGRAM_SIZE];
|
||||
uint16 shift = 0;
|
||||
int32 pass = 0;
|
||||
for (; pass < 6; pass++)
|
||||
{
|
||||
Platform::MemoryClear(histogram, sizeof(uint32) * RADIXSORT_HISTOGRAM_SIZE);
|
||||
|
||||
bool sorted = true;
|
||||
T key = keys[0];
|
||||
T prevKey = key;
|
||||
for (int32 i = 0; i < count; i++)
|
||||
{
|
||||
key = keys[i];
|
||||
const uint16 index = (key >> shift) & RADIXSORT_BIT_MASK;
|
||||
++histogram[index];
|
||||
sorted &= prevKey <= key;
|
||||
prevKey = key;
|
||||
}
|
||||
|
||||
if (sorted)
|
||||
{
|
||||
goto end;
|
||||
}
|
||||
|
||||
uint32 offset = 0;
|
||||
for (int32 i = 0; i < RADIXSORT_HISTOGRAM_SIZE; ++i)
|
||||
{
|
||||
const uint32 cnt = histogram[i];
|
||||
histogram[i] = offset;
|
||||
offset += cnt;
|
||||
}
|
||||
|
||||
for (int32 i = 0; i < count; i++)
|
||||
{
|
||||
const T k = keys[i];
|
||||
const uint16 index = (k >> shift) & RADIXSORT_BIT_MASK;
|
||||
const uint32 dest = histogram[index]++;
|
||||
tempKeys[dest] = k;
|
||||
tempValues[dest] = values[i];
|
||||
}
|
||||
|
||||
T* const swapKeys = tempKeys;
|
||||
tempKeys = keys;
|
||||
keys = swapKeys;
|
||||
|
||||
U* const swapValues = tempValues;
|
||||
tempValues = values;
|
||||
values = swapValues;
|
||||
|
||||
shift += RADIXSORT_BITS;
|
||||
}
|
||||
|
||||
end:
|
||||
if (pass & 1)
|
||||
{
|
||||
// Use temporary keys as a result
|
||||
inputKeys = tmpKeys;
|
||||
|
||||
#if 0
|
||||
// Use temporary values as a result
|
||||
inputValues = tmpValues;
|
||||
#else
|
||||
// Odd number of passes needs to do copy to the destination
|
||||
Platform::MemoryCopy(inputValues, tmpValues, sizeof(U) * count);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
/// <summary>
|
||||
@@ -530,9 +427,11 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
|
||||
const Plane plane(renderContext.View.Position, renderContext.View.Direction);
|
||||
|
||||
// Peek shared memory
|
||||
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
|
||||
PREPARE_CACHE(SortingKeys[0]);
|
||||
PREPARE_CACHE(SortingKeys[1]);
|
||||
PREPARE_CACHE(SortingIndices);
|
||||
#undef PREPARE_CACHE
|
||||
uint64* sortedKeys = SortingKeys[0].Get();
|
||||
|
||||
// Generate sort keys (by depth) and batch keys (higher bits)
|
||||
@@ -541,7 +440,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
|
||||
{
|
||||
auto& drawCall = DrawCalls[list.Indices[i]];
|
||||
const auto distance = CollisionsHelper::DistancePlanePoint(plane, drawCall.ObjectPosition);
|
||||
const uint32 sortKey = ComputeDistance(distance) ^ sortKeyXor;
|
||||
const uint32 sortKey = RenderTools::ComputeDistanceSortKey(distance) ^ sortKeyXor;
|
||||
int32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
|
||||
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]);
|
||||
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]);
|
||||
@@ -560,7 +459,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
|
||||
}
|
||||
|
||||
// Sort draw calls indices
|
||||
RadixSort(sortedKeys, list.Indices.Get(), SortingKeys[1].Get(), SortingIndices.Get(), listSize);
|
||||
int32* resultIndices = list.Indices.Get();
|
||||
Sorting::RadixSort(sortedKeys, resultIndices, SortingKeys[1].Get(), SortingIndices.Get(), listSize);
|
||||
if (resultIndices != list.Indices.Get())
|
||||
Platform::MemoryCopy(list.Indices.Get(), resultIndices, sizeof(int32) * listSize);
|
||||
|
||||
// Perform draw calls batching
|
||||
list.Batches.Clear();
|
||||
|
||||
Reference in New Issue
Block a user