diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index dc2d9054d..0ab1e0792 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -208,26 +208,27 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff const auto sortMode = static_cast(module->Values[2].AsInt); const int32 stride = buffer->Stride; const int32 listSize = buffer->CPU.Count; + const int32 indicesByteSize = listSize * buffer->GPU.SortedIndices->GetStride(); Array sortingKeysList[4]; - Array sortingIndicesList[2]; + Array sortingIndicesList[2]; uint32* sortingKeys[2]; - int32* sortingIndices[2]; + void* sortingIndices[2]; if (listSize < 500) { // Use fast stack allocator from RenderList auto& memory = renderContextBatch.GetMainContext().List->Memory; sortingKeys[0] = memory.Allocate(listSize); sortingKeys[1] = memory.Allocate(listSize); - sortingIndices[0] = memory.Allocate(listSize); - sortingIndices[1] = memory.Allocate(listSize); + sortingIndices[0] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); + sortingIndices[1] = memory.Allocate(indicesByteSize, GPU_SHADER_DATA_ALIGNMENT); } else { // Use shared pooled memory from RendererAllocation sortingKeysList[0].Resize(listSize); sortingKeysList[1].Resize(listSize); - sortingIndicesList[0].Resize(listSize); - sortingIndicesList[1].Resize(listSize); + sortingIndicesList[0].Resize(indicesByteSize); + sortingIndicesList[1].Resize(indicesByteSize); sortingKeys[0] = sortingKeysList[0].Get(); sortingKeys[1] = sortingKeysList[1].Get(); sortingIndices[0] = sortingIndicesList[0].Get(); @@ -314,21 +315,42 @@ void DrawEmitterCPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff } // Generate sorting indices - int32* sortedIndices = sortingIndices[0]; + void* sortedIndices = sortingIndices[0]; + switch (buffer->GPU.SortedIndices->GetFormat()) { + case PixelFormat::R16_UInt: for (int32 i = 0; i < listSize; i++) - sortedIndices[i] = i; + ((uint16*)sortedIndices)[i] = i; + break; + case PixelFormat::R32_UInt: + for (int32 i = 0; i < listSize; i++) + ((uint32*)sortedIndices)[i] = i; + break; } // Sort keys with indices + switch (buffer->GPU.SortedIndices->GetFormat()) { - Sorting::RadixSort(sortedKeys, sortedIndices, sortingKeys[1], sortingIndices[1], listSize); + case PixelFormat::R16_UInt: + { + uint16* sortedIndicesTyped = (uint16*)sortedIndices; + Sorting::RadixSort(sortedKeys, sortedIndicesTyped, sortingKeys[1], (uint16*)sortingIndices[1], listSize); + sortedIndices = sortedIndicesTyped; + break; + } + case PixelFormat::R32_UInt: + { + uint32* sortedIndicesTyped = (uint32*)sortedIndices; + Sorting::RadixSort(sortedKeys, sortedIndicesTyped, sortingKeys[1], (uint32*)sortingIndices[1], listSize); + sortedIndices = sortedIndicesTyped; + break; + } } // Upload CPU particles indices { RenderContext::GPULocker.Lock(); - context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, listSize * sizeof(uint32), sortedIndicesOffset); + context->UpdateBuffer(buffer->GPU.SortedIndices, sortedIndices, indicesByteSize, sortedIndicesOffset); RenderContext::GPULocker.Unlock(); } } @@ -1312,7 +1334,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context) // Pre-pass with buffers setup { - PROFILE_GPU_CPU("Sim"); + PROFILE_GPU_CPU_NAMED("Sim"); for (GPUSim& sim : sims) { sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); diff --git a/Source/Engine/Particles/ParticlesData.cpp b/Source/Engine/Particles/ParticlesData.cpp index 226d06d3d..287a97b0b 100644 --- a/Source/Engine/Particles/ParticlesData.cpp +++ b/Source/Engine/Particles/ParticlesData.cpp @@ -164,26 +164,33 @@ bool ParticleBuffer::AllocateSortBuffer() ASSERT(Emitter && GPU.SortedIndices == nullptr && GPU.SortingKeys == nullptr); if (Emitter->Graph.SortModules.IsEmpty()) return false; + const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count(); + uint32 indexSize = sizeof(uint32); + PixelFormat indexFormat = PixelFormat::R32_UInt; + if (Capacity <= MAX_uint16) + { + // 16-bit indices + indexSize = sizeof(uint16); + indexFormat = PixelFormat::R16_UInt; + } switch (Mode) { case ParticlesSimulationMode::CPU: { - const int32 sortedIndicesSize = Capacity * sizeof(uint32) * Emitter->Graph.SortModules.Count(); GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); - if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesSize, GPUBufferFlags::ShaderResource, PixelFormat::R32_UInt, nullptr, sizeof(uint32), GPUResourceUsage::Dynamic))) + if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * indexSize, GPUBufferFlags::ShaderResource, indexFormat, nullptr, indexSize, GPUResourceUsage::Dynamic))) return true; break; } #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: { - const int32 sortedIndicesCount = Capacity * Emitter->Graph.SortModules.Count(); GPU.SortingKeys = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortingKeys")); - if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float)))) + if (GPU.SortingKeys->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(float), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_Float, nullptr, sizeof(float)))) return true; GPU.SortedIndices = GPUDevice::Instance->CreateBuffer(TEXT("ParticleSortedIndices")); - if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32)))) + if (GPU.SortedIndices->Init(GPUBufferDescription::Buffer(sortedIndicesCount * indexSize, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess, indexFormat, nullptr, indexSize))) return true; break; }