diff --git a/Content/Shaders/BitonicSort.flax b/Content/Shaders/BitonicSort.flax
index 69d773379..4d388b3fc 100644
--- a/Content/Shaders/BitonicSort.flax
+++ b/Content/Shaders/BitonicSort.flax
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:07d45b7f2085a28938e3bef090e259c0698a1987f9cd69df952168524ce07193
-size 6877
+oid sha256:190867e40ef793168988f358edddeb92819cc4f972f4cf9ac34cc764a06eb6e3
+size 6824
diff --git a/Content/Shaders/GPUParticlesSorting.flax b/Content/Shaders/GPUParticlesSorting.flax
index 2045fd649..35cebf7b6 100644
--- a/Content/Shaders/GPUParticlesSorting.flax
+++ b/Content/Shaders/GPUParticlesSorting.flax
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:a16a973f4be075f8531a1b1551e33423b014da1e8b348f2672464ee21692e57a
-size 2556
+oid sha256:80ed5f51cd982ea521e3588708db54d79b905ee41e88cfd41eff976b9b50514a
+size 2518
diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp
index 2423feed2..dc2d9054d 100644
--- a/Source/Engine/Particles/Particles.cpp
+++ b/Source/Engine/Particles/Particles.cpp
@@ -848,81 +848,68 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch)
context->BindCB(0, GPUParticlesSortingCB);
// Generate sort keys for each particle
- for (const GPUEmitterDraw& draw : GPUEmitterDraws)
{
- if (!draw.Sorting)
- continue;
- ASSERT(draw.Buffer->GPU.SortingKeysBuffer);
-
- // Generate sort keys for particles
- ParticleEmitter* emitter = draw.Buffer->Emitter;
- for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
+ PROFILE_GPU("Gen Sort Keys");
+ for (const GPUEmitterDraw& draw : GPUEmitterDraws)
{
- auto module = emitter->Graph.SortModules[moduleIndex];
- const auto sortMode = (ParticleSortMode)module->Values[2].AsInt;
-
- // Generate sorting keys based on sorting mode
- GPUParticlesSortingData data;
- data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset;
- data.ParticleStride = draw.Buffer->Stride;
- data.ParticleCapacity = draw.Buffer->Capacity;
- int32 permutationIndex;
- switch (sortMode)
+ if (!draw.Sorting)
+ continue;
+ ASSERT(draw.Buffer->GPU.SortingKeys);
+ ParticleEmitter* emitter = draw.Buffer->Emitter;
+ for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
{
- case ParticleSortMode::ViewDepth:
- {
- permutationIndex = 0;
- data.PositionOffset = emitter->Graph.GetPositionAttributeOffset();
- const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection();
- if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
+ auto module = emitter->Graph.SortModules[moduleIndex];
+ // TODO: add support for module->SortedIndicesOffset (multiple sort modules)
+ const auto sortMode = (ParticleSortMode)module->Values[2].AsInt;
+ GPUParticlesSortingData data;
+ data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset;
+ data.ParticleStride = draw.Buffer->Stride;
+ data.ParticleCapacity = draw.Buffer->Capacity;
+ int32 permutationIndex;
+ switch (sortMode)
{
- Matrix matrix;
- Matrix::Multiply(draw.DrawCall.World, viewProjection, matrix);
- Matrix::Transpose(matrix, data.PositionTransform);
- }
- else
+ case ParticleSortMode::ViewDepth:
{
- Matrix::Transpose(viewProjection, data.PositionTransform);
- }
- break;
- }
- case ParticleSortMode::ViewDistance:
- {
- permutationIndex = 1;
- data.PositionOffset = emitter->Graph.GetPositionAttributeOffset();
- data.ViewPosition = renderContextBatch.GetMainContext().View.Position;
- if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
- {
- Matrix::Transpose(draw.DrawCall.World, data.PositionTransform);
- }
- else
- {
- Matrix::Transpose(Matrix::Identity, data.PositionTransform);
- }
- break;
- }
- case ParticleSortMode::CustomAscending:
- case ParticleSortMode::CustomDescending:
- {
- permutationIndex = 2;
- int32 attributeIdx = module->Attributes[0];
- if (attributeIdx == -1)
+ permutationIndex = 0;
+ data.PositionOffset = emitter->Graph.GetPositionAttributeOffset();
+ const Matrix viewProjection = renderContextBatch.GetMainContext().View.ViewProjection();
+ if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
+ Matrix::Transpose(draw.DrawCall.World * viewProjection, data.PositionTransform);
+ else
+ Matrix::Transpose(viewProjection, data.PositionTransform);
break;
- data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset;
- break;
+ }
+ case ParticleSortMode::ViewDistance:
+ {
+ permutationIndex = 1;
+ data.PositionOffset = emitter->Graph.GetPositionAttributeOffset();
+ data.ViewPosition = renderContextBatch.GetMainContext().View.Position;
+ if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
+ Matrix::Transpose(draw.DrawCall.World, data.PositionTransform);
+ else
+ Matrix::Transpose(Matrix::Identity, data.PositionTransform);
+ break;
+ }
+ case ParticleSortMode::CustomAscending:
+ case ParticleSortMode::CustomDescending:
+ {
+ permutationIndex = 2;
+ int32 attributeIdx = module->Attributes[0];
+ if (attributeIdx == -1)
+ break;
+ data.CustomOffset = emitter->Graph.Layout.Attributes[attributeIdx].Offset;
+ break;
+ }
+ }
+ context->UpdateCB(GPUParticlesSortingCB, &data);
+ context->BindSR(0, draw.Buffer->GPU.Buffer->View());
+ context->BindUA(0, draw.Buffer->GPU.SortedIndices->View());
+ context->BindUA(1, draw.Buffer->GPU.SortingKeys->View());
+ const int32 threadGroupSize = 1024;
+ context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1);
}
-#if !BUILD_RELEASE
- default:
- CRASH;
- return;
-#endif
- }
- context->UpdateCB(GPUParticlesSortingCB, &data);
- context->BindSR(0, draw.Buffer->GPU.Buffer->View());
- context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View());
- const int32 threadGroupSize = 1024;
- context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1);
}
+ context->ResetUA();
}
// Run sorting
@@ -930,17 +917,18 @@ void DrawEmittersGPU(RenderContextBatch& renderContextBatch)
{
if (!draw.Sorting)
continue;
- ASSERT(draw.Buffer->GPU.SortingKeysBuffer);
// Execute all sorting modules
ParticleEmitter* emitter = draw.Buffer->Emitter;
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
{
auto module = emitter->Graph.SortModules[moduleIndex];
+ // TODO: add support for module->SortedIndicesOffset (multiple sort modules)
const auto sortMode = (ParticleSortMode)module->Values[2].AsInt;
bool sortAscending = sortMode == ParticleSortMode::CustomAscending;
- BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.ParticlesCountMax);
- // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (run all sorting in parallel)
+ BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortedIndices, draw.Buffer->GPU.SortingKeys, draw.Buffer->GPU.Buffer, draw.Buffer->GPU.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.ParticlesCountMax);
+ // TODO: use args buffer from GPUIndirectArgsBuffer instead of internal from BitonicSort to get rid of UAV barrier (all sorting in parallel)
+ // TODO: run small emitters sorting (less than 2k particles) sorting in separate loop as pass without UAV barriers (all sorting in parallel)
}
}
}
diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp
index 074cc73d6..226d06d3d 100644
--- a/Source/Engine/Particles/ParticlesData.cpp
+++ b/Source/Engine/Particles/ParticlesData.cpp
@@ -98,7 +98,7 @@ ParticleBuffer::~ParticleBuffer()
{
SAFE_DELETE_GPU_RESOURCE(GPU.Buffer);
SAFE_DELETE_GPU_RESOURCE(GPU.BufferSecondary);
- SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeysBuffer);
+ SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeys);
SAFE_DELETE_GPU_RESOURCE(GPU.SortedIndices);
SAFE_DELETE(GPU.RibbonIndexBufferDynamic);
SAFE_DELETE(GPU.RibbonVertexBufferDynamic);
@@ -161,7 +161,7 @@ bool ParticleBuffer::Init(ParticleEmitter* emitter)
bool ParticleBuffer::AllocateSortBuffer()
{
- ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeysBuffer == nullptr);
+ ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeys == nullptr);
if (Emitter->Graph.SortModules.IsEmpty())
return false;
@@ -170,7 +170,7 @@ bool ParticleBuffer::AllocateSortBuffer()
case ParticlesSimulationMode::CPU:
{
const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count();
- GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("SortedIndices"));
+ GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices"));
if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource, PixelFormat::R32_UInt, nullptr, sizeof(uint32), GPUResourceUsage::Dynamic)))
return true;
break;
@@ -178,12 +178,12 @@ bool ParticleBuffer::AllocateSortBuffer()
#if COMPILE_WITH_GPU_PARTICLES
case ParticlesSimulationMode::GPU:
{
- const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count();
- GPU.SortingKeysBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeysBuffer"));
- if (GPU.SortingKeysBuffer->Init(GPUBufferDescription::Structured(Capacity, sizeof(float) + sizeof(uint32), true)))
+ const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count();
+ GPU.SortingKeys = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeys"));
+ if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float))))
return true;
- GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("SortedIndices"));
- if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32))))
+ GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices"));
+ if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32))))
return true;
break;
}
diff --git a/Source/Engine/Particles/ParticlesData.h b/Source/Engine/Particles/ParticlesData.h
index 5a5ebcba4..521138f37 100644
--- a/Source/Engine/Particles/ParticlesData.h
+++ b/Source/Engine/Particles/ParticlesData.h
@@ -206,7 +206,7 @@ public:
///
/// The GPU particles sorting buffer. Contains structure of particle index and the sorting key for every particle. Used to sort particles.
///
- GPUBuffer* SortingKeysBuffer = nullptr;
+ GPUBuffer* SortingKeys = nullptr;
///
/// The particles indices buffer (GPU side).
diff --git a/Source/Engine/Renderer/Utils/BitonicSort.cpp b/Source/Engine/Renderer/Utils/BitonicSort.cpp
index ed7ece05b..cd0f627f5 100644
--- a/Source/Engine/Renderer/Utils/BitonicSort.cpp
+++ b/Source/Engine/Renderer/Utils/BitonicSort.cpp
@@ -8,7 +8,7 @@
GPU_CB_STRUCT(Data {
float NullItemKey;
- uint32 NullItemValue;
+ uint32 NullItemIndex;
uint32 CounterOffset;
uint32 MaxIterations;
uint32 LoopK;
@@ -47,7 +47,6 @@ bool BitonicSort::Init()
bool BitonicSort::setupResources()
{
- // Check if shader has not been loaded
if (!_shader->IsLoaded())
return true;
const auto shader = _shader->GetShader();
@@ -59,14 +58,12 @@ bool BitonicSort::setupResources()
_preSortCS.Get(shader, "CS_PreSort");
_innerSortCS = shader->GetCS("CS_InnerSort");
_outerSortCS = shader->GetCS("CS_OuterSort");
- _copyIndicesCS = shader->GetCS("CS_CopyIndices");
return false;
}
void BitonicSort::Dispose()
{
- // Base
RendererPass::Dispose();
// Cleanup
@@ -76,17 +73,16 @@ void BitonicSort::Dispose()
_preSortCS.Clear();
_innerSortCS = nullptr;
_outerSortCS = nullptr;
- _copyIndicesCS = nullptr;
_shader = nullptr;
}
-void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements)
+void BitonicSort::Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer* keysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, int32 maxElements)
{
- ASSERT(context && sortingKeysBuffer && countBuffer);
+ ASSERT(context && indicesBuffer && keysBuffer && countBuffer);
if (checkIfSkipPass())
return;
PROFILE_GPU_CPU("Bitonic Sort");
- uint32 maxNumElements = sortingKeysBuffer->GetSize() / sizeof(uint64);
+ uint32 maxNumElements = indicesBuffer->GetElementsCount();
if (maxElements > 0 && maxElements < maxNumElements)
maxNumElements = maxElements;
const uint32 alignedMaxNumElements = Math::RoundUpToPowerOf2(maxNumElements);
@@ -96,7 +92,7 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf
Data data;
data.CounterOffset = counterOffset;
data.NullItemKey = sortAscending ? MAX_float : -MAX_float;
- data.NullItemValue = 0;
+ data.NullItemIndex = 0;
data.KeySign = sortAscending ? -1.0f : 1.0f;
data.MaxIterations = maxIterations;
data.LoopK = 0;
@@ -110,7 +106,8 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf
{
// Use pre-sort with smaller thread group size (eg. for small particle emitters sorting)
const int32 permutation = maxNumElements < 128 ? 1 : 0;
- context->BindUA(0, sortingKeysBuffer->View());
+ context->BindUA(0, indicesBuffer->View());
+ context->BindUA(1, keysBuffer->View());
context->Dispatch(_preSortCS.Get(permutation), 1, 1, 1);
}
else
@@ -120,7 +117,8 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf
context->Dispatch(_indirectArgsCS, 1, 1, 1);
// Pre-Sort the buffer up to k = 2048 (this also pads the list with invalid indices that will drift to the end of the sorted list)
- context->BindUA(0, sortingKeysBuffer->View());
+ context->BindUA(0, indicesBuffer->View());
+ context->BindUA(1, keysBuffer->View());
context->DispatchIndirect(_preSortCS.Get(0), _dispatchArgsBuffer, 0);
// We have already pre-sorted up through k = 2048 when first writing our list, so we continue sorting with k = 4096
@@ -144,27 +142,4 @@ void BitonicSort::Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuf
}
context->ResetUA();
-
- if (sortedIndicesBuffer)
- {
- // Copy indices to another buffer
-#if !BUILD_RELEASE
- switch (sortedIndicesBuffer->GetDescription().Format)
- {
- case PixelFormat::R32_UInt:
- case PixelFormat::R16_UInt:
- case PixelFormat::R8_UInt:
- break;
- default:
- LOG(Warning, "Invalid format {0} of sortedIndicesBuffer for BitonicSort. It needs to be UInt type.", (int32)sortedIndicesBuffer->GetDescription().Format);
- }
-#endif
- context->BindSR(1, sortingKeysBuffer->View());
- context->BindUA(0, sortedIndicesBuffer->View());
- // TODO: use indirect dispatch to match the items count for copy
- context->Dispatch(_copyIndicesCS, (alignedMaxNumElements + 1023) / 1024, 1, 1);
- }
-
- context->ResetUA();
- context->ResetSR();
}
diff --git a/Source/Engine/Renderer/Utils/BitonicSort.h b/Source/Engine/Renderer/Utils/BitonicSort.h
index 4280d5965..1fd5d50cd 100644
--- a/Source/Engine/Renderer/Utils/BitonicSort.h
+++ b/Source/Engine/Renderer/Utils/BitonicSort.h
@@ -26,16 +26,16 @@ private:
public:
///
- /// Sorts the specified buffer of index-key pairs.
+ /// Sorts the specified buffers of index-key pairs.
///
/// The GPU context.
- /// The sorting keys buffer. Used as a structured buffer of type Item (see above).
+ /// The sorting indices buffer with an index for each item (sequence of: 0, 1, 2, 3...). After sorting represents actual items order based on their keys. Valid for uint value types - used as RWBuffer.
+ /// The sorting keys buffer with a sort value for each item (must match order of items in indicesBuffer). Valid for float value types - used as RWBuffer.
/// The buffer that contains a items counter value.
/// The offset into counter buffer to find count for this list. Must be a multiple of 4 bytes.
/// True to sort in ascending order (smallest to largest), otherwise false to sort in descending order.
- /// The output buffer for sorted values extracted from the sorted sortingKeysBuffer after algorithm run. Valid for uint value types - used as RWBuffer.
/// Optional upper limit of elements to sort. Cna be used to optimize indirect dispatches allocation. If non-zero, then it gets calculated based on the input item buffer size.
- void Sort(GPUContext* context, GPUBuffer* sortingKeysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, GPUBuffer* sortedIndicesBuffer, uint32 maxElements = 0);
+ void Sort(GPUContext* context, GPUBuffer* indicesBuffer, GPUBuffer* keysBuffer, GPUBuffer* countBuffer, uint32 counterOffset, bool sortAscending, int32 maxElements = 0);
public:
diff --git a/Source/Shaders/BitonicSort.shader b/Source/Shaders/BitonicSort.shader
index 0f9d5e656..6538ff7ff 100644
--- a/Source/Shaders/BitonicSort.shader
+++ b/Source/Shaders/BitonicSort.shader
@@ -10,12 +10,12 @@
struct Item
{
float Key;
- uint Value;
+ uint Index;
};
META_CB_BEGIN(0, Data)
float NullItemKey;
-uint NullItemValue;
+uint NullItemIndex;
uint CounterOffset;
uint MaxIterations;
uint LoopK;
@@ -40,12 +40,12 @@ uint InsertOneBit(uint value, uint oneBitMask)
// (effectively a negation) or leave the value alone. When the KeySign is
// 1, we are sorting descending, so when A < B, they should swap. For an
// ascending sort, -A < -B should swap.
-bool ShouldSwap(Item a, Item b)
+bool ShouldSwap(float a, float b)
{
//return (a ^ NullItem) < (b ^ NullItem);
- //return (a.Key) < (b.Key);
- return (a.Key * KeySign) < (b.Key * KeySign);
+ //return (a) < (b);
+ return (a * KeySign) < (b * KeySign);
//return asfloat(a) < asfloat(b);
//return (asfloat(a) * KeySign) < (asfloat(b) * KeySign);
}
@@ -93,7 +93,8 @@ void CS_IndirectArgs(uint groupIndex : SV_GroupIndex)
#if defined(_CS_PreSort) || defined(_CS_InnerSort)
-RWStructuredBuffer- SortBuffer : register(u0);
+RWBuffer SortedIndices : register(u0);
+RWBuffer SortingKeys : register(u1);
groupshared Item SortData[THREAD_GROUP_SIZE * 2];
@@ -103,12 +104,13 @@ void LoadItem(uint element, uint count)
Item item;
if (element < count)
{
- item = SortBuffer[element];
+ item.Key = SortingKeys[element];
+ item.Index = SortedIndices[element];
}
else
{
item.Key = NullItemKey;
- item.Value = NullItemValue;
+ item.Index = NullItemIndex;
}
SortData[element & (THREAD_GROUP_SIZE * 2 - 1)] = item;
}
@@ -117,7 +119,9 @@ void StoreItem(uint element, uint count)
{
if (element < count)
{
- SortBuffer[element] = SortData[element & 2047];
+ Item item = SortData[element & ((THREAD_GROUP_SIZE * 2 - 1))];
+ SortingKeys[element] = item.Key;
+ SortedIndices[element] = item.Index;
}
}
@@ -153,7 +157,7 @@ void CS_PreSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
Item a = SortData[index1];
Item b = SortData[index2];
- if (ShouldSwap(a, b))
+ if (ShouldSwap(a.Key, b.Key))
{
// Swap the items
SortData[index1] = b;
@@ -197,7 +201,7 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
Item a = SortData[index1];
Item b = SortData[index2];
- if (ShouldSwap(a, b))
+ if (ShouldSwap(a.Key, b.Key))
{
// Swap the items
SortData[index1] = b;
@@ -215,7 +219,8 @@ void CS_InnerSort(uint3 groupID : SV_GroupID, uint groupIndex : SV_GroupIndex)
#ifdef _CS_OuterSort
-RWStructuredBuffer
- SortBuffer : register(u0);
+RWBuffer SortedIndices : register(u0);
+RWBuffer SortingKeys : register(u1);
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(1024, 1, 1)]
@@ -230,35 +235,19 @@ void CS_OuterSort(uint3 dispatchThreadId : SV_DispatchThreadID)
if (index2 >= count)
return;
- Item a = SortBuffer[index1];
- Item b = SortBuffer[index2];
+ float aKey = SortingKeys[index1];
+ float bKey = SortingKeys[index2];
- if (ShouldSwap(a, b))
+ if (ShouldSwap(aKey, bKey))
{
// Swap the items
- SortBuffer[index1] = b;
- SortBuffer[index2] = a;
+ SortingKeys[index1] = bKey;
+ SortingKeys[index2] = aKey;
+ uint aIndex = SortedIndices[index1];
+ uint bIndex = SortedIndices[index2];
+ SortedIndices[index1] = bIndex;
+ SortedIndices[index2] = aIndex;
}
}
#endif
-
-#ifdef _CS_CopyIndices
-
-StructuredBuffer
- SortBuffer : register(t1);
-RWBuffer SortedIndices : register(u0);
-
-META_CS(true, FEATURE_LEVEL_SM5)
-[numthreads(1024, 1, 1)]
-void CS_CopyIndices(uint3 dispatchThreadId : SV_DispatchThreadID)
-{
- const uint count = CounterBuffer.Load(CounterOffset);
- uint index = dispatchThreadId.x;
- if (index >= count)
- return;
-
- Item element = SortBuffer[index];
- SortedIndices[index] = element.Value;
-}
-
-#endif
diff --git a/Source/Shaders/GPUParticlesSorting.shader b/Source/Shaders/GPUParticlesSorting.shader
index 395172327..113096421 100644
--- a/Source/Shaders/GPUParticlesSorting.shader
+++ b/Source/Shaders/GPUParticlesSorting.shader
@@ -20,13 +20,9 @@ META_CB_END
// Particles data buffer
ByteAddressBuffer ParticlesData : register(t0);
-// Output sorting keys buffer (index + key)
-struct Item
-{
- float Key;
- uint Value;
-};
-RWStructuredBuffer
- SortingKeys : register(u0);
+// Sorting data (per-particle)
+RWBuffer SortedIndices : register(u0);
+RWBuffer SortingKeys : register(u1);
float GetParticleFloat(uint particleIndex, int offset)
{
@@ -78,8 +74,6 @@ void CS_Sort(uint3 dispatchThreadId : SV_DispatchThreadID)
#endif
// Write sorting index-key pair
- Item item;
- item.Key = sortKey;
- item.Value = index;
- SortingKeys[index] = item;
+ SortedIndices[index] = index;
+ SortingKeys[index] = sortKey;
}