Optimize GPU particles drawing with batched indirect args setup before sorting

This commit is contained in:
Wojtek Figat
2025-08-07 18:41:58 +02:00
parent 3ffb067e55
commit d4355e31d8
4 changed files with 350 additions and 271 deletions

View File

@@ -635,6 +635,22 @@ AssetReference<Shader> GPUParticlesSorting;
GPUConstantBuffer* GPUParticlesSortingCB; GPUConstantBuffer* GPUParticlesSortingCB;
GPUShaderProgramCS* GPUParticlesSortingCS[3]; GPUShaderProgramCS* GPUParticlesSortingCS[3];
// GPU emitters drawing is batched for efficiency
struct GPUEmitterDraw
{
ParticleBuffer* Buffer;
DrawCall DrawCall;
DrawPass DrawModes;
StaticFlags StaticFlags;
BoundingSphere Bounds;
uint32 RenderModulesIndices;
uint32 IndirectArgsSize;
int8 SortOrder;
bool Sorting;
};
Array<GPUEmitterDraw> GPUEmitterDraws;
GPUBuffer* GPUIndirectArgsBuffer = nullptr;
#if COMPILE_WITH_DEV_ENV #if COMPILE_WITH_DEV_ENV
void OnShaderReloading(Asset* obj) void OnShaderReloading(Asset* obj)
@@ -648,44 +664,41 @@ void OnShaderReloading(Asset* obj)
void CleanupGPUParticlesSorting() void CleanupGPUParticlesSorting()
{ {
GPUParticlesSorting = nullptr; GPUParticlesSorting = nullptr;
GPUEmitterDraws.Resize(0);
SAFE_DELETE_GPU_RESOURCE(GPUIndirectArgsBuffer);
} }
void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder) void DrawEmittersGPU(RenderContextBatch& renderContextBatch)
{ {
if (!IsInMainThread()) PROFILE_GPU_CPU_NAMED("DrawEmittersGPU");
{ ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker);
// Clone draw call data the hard way GPUContext* context = GPUDevice::Instance->GetMainContext();
byte drawCallCopy[sizeof(DrawCall)];
Platform::MemoryCopy(&drawCallCopy, &drawCall, sizeof(DrawCall));
// When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely // Count draws and sorting passes needed for resources allocation
// Move drawing into delayed callback post scene drawing to use GPUContext safely uint32 indirectArgsSize = 0;
renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch, buffer, drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder](RenderContext& renderContext) bool sorting = false;
for (const GPUEmitterDraw& draw : GPUEmitterDraws)
{ {
DrawEmitterGPU(renderContextBatch, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, bounds, renderModulesIndices, sortOrder); indirectArgsSize += draw.IndirectArgsSize;
}); sorting |= draw.Sorting;
return;
} }
const auto context = GPUDevice::Instance->GetMainContext();
auto emitter = buffer->Emitter;
// Check if need to perform any particles sorting
if (emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0)
{
PROFILE_GPU_CPU_NAMED("Sort Particles");
// Prepare pipeline // Prepare pipeline
if (GPUParticlesSorting == nullptr) if (sorting && GPUParticlesSorting == nullptr)
{ {
// TODO: preload shader if platform supports GPU particles // TODO: preload shader if platform supports GPU particles (eg. inside ParticleEmitter::load if it's GPU sim with any sort module)
GPUParticlesSorting = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/GPUParticlesSorting")); GPUParticlesSorting = Content::LoadAsyncInternal<Shader>(TEXT("Shaders/GPUParticlesSorting"));
if (GPUParticlesSorting == nullptr || GPUParticlesSorting->WaitForLoaded())
return;
#if COMPILE_WITH_DEV_ENV #if COMPILE_WITH_DEV_ENV
if (GPUParticlesSorting)
GPUParticlesSorting.Get()->OnReloading.Bind<OnShaderReloading>(); GPUParticlesSorting.Get()->OnReloading.Bind<OnShaderReloading>();
#endif #endif
} }
if (!GPUParticlesSortingCB) if (GPUParticlesSorting == nullptr || !GPUParticlesSorting->IsLoaded())
{
// Skip sorting until shader is ready
sorting = false;
}
else if (!GPUParticlesSortingCB)
{ {
const auto shader = GPUParticlesSorting->GetShader(); const auto shader = GPUParticlesSorting->GetShader();
const StringAnsiView CS_Sort("CS_Sort"); const StringAnsiView CS_Sort("CS_Sort");
@@ -693,25 +706,108 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
GPUParticlesSortingCS[1] = shader->GetCS(CS_Sort, 1); GPUParticlesSortingCS[1] = shader->GetCS(CS_Sort, 1);
GPUParticlesSortingCS[2] = shader->GetCS(CS_Sort, 2); GPUParticlesSortingCS[2] = shader->GetCS(CS_Sort, 2);
GPUParticlesSortingCB = shader->GetCB(0); GPUParticlesSortingCB = shader->GetCB(0);
ASSERT(GPUParticlesSortingCB); ASSERT_LOW_LAYER(GPUParticlesSortingCB);
} }
const uint32 indirectArgsCapacity = Math::RoundUpToPowerOf2(indirectArgsSize);
if (GPUIndirectArgsBuffer == nullptr)
GPUIndirectArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleIndirectDrawArgsBuffer"));
if (GPUIndirectArgsBuffer->GetSize() < indirectArgsCapacity)
GPUIndirectArgsBuffer->Init(GPUBufferDescription::Argument(indirectArgsCapacity));
// Prepare sorting data // Build indirect arguments
if (!buffer->GPU.SortedIndices) uint32 indirectArgsOffset = 0;
buffer->AllocateSortBuffer(); for (GPUEmitterDraw& draw : GPUEmitterDraws)
ASSERT(buffer->GPU.SortingKeysBuffer); {
ParticleEmitter* emitter = draw.Buffer->Emitter;
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++)
{
if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0)
continue;
auto module = emitter->Graph.RenderModules.Get()[moduleIndex];
draw.DrawCall.Particle.Module = module;
switch (module->TypeID)
{
// Sprite Rendering
case 400:
{
const auto material = (MaterialBase*)module->Assets[0].Get();
const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default;
auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes();
if (dp == DrawPass::None || SpriteRenderer.Init())
break;
// Draw sprite for each particle
GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 };
context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset);
context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset);
indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs);
break;
}
// Model Rendering
case 403:
{
const auto model = (Model*)module->Assets[0].Get();
const auto material = (MaterialBase*)module->Assets[1].Get();
const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default;
auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes();
if (dp == DrawPass::None)
break;
// TODO: model LOD picking for particles?
int32 lodIndex = 0;
ModelLOD& lod = model->LODs[lodIndex];
for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++)
{
Mesh& mesh = lod.Meshes[meshIndex];
if (!mesh.IsInitialized())
continue;
// Draw mesh for each particle
GPUDrawIndexedIndirectArgs args { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 };
context->UpdateBuffer(GPUIndirectArgsBuffer, &args, sizeof(args), indirectArgsOffset);
context->CopyBuffer(GPUIndirectArgsBuffer, draw.Buffer->GPU.Buffer, 4, indirectArgsOffset + 4, draw.Buffer->GPU.ParticleCounterOffset);
indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs);
}
break;
}
// Ribbon Rendering
case 404:
{
// Not supported
break;
}
// Volumetric Fog Rendering
case 405:
{
// Not supported
break;
}
}
}
}
indirectArgsOffset = 0;
// Sort particles
if (sorting)
{
PROFILE_GPU_CPU_NAMED("Sort Particles");
for (const GPUEmitterDraw& draw : GPUEmitterDraws)
{
if (!draw.Sorting)
continue;
ASSERT(draw.Buffer->GPU.SortingKeysBuffer);
// Execute all sorting modules // Execute all sorting modules
ParticleEmitter* emitter = draw.Buffer->Emitter;
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++) for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.SortModules.Count(); moduleIndex++)
{ {
auto module = emitter->Graph.SortModules[moduleIndex]; auto module = emitter->Graph.SortModules[moduleIndex];
const auto sortMode = static_cast<ParticleSortMode>(module->Values[2].AsInt); const auto sortMode = (ParticleSortMode)module->Values[2].AsInt;
// Generate sorting keys based on sorting mode // Generate sorting keys based on sorting mode
GPUParticlesSortingData data; GPUParticlesSortingData data;
data.ParticleCounterOffset = buffer->GPU.ParticleCounterOffset; data.ParticleCounterOffset = draw.Buffer->GPU.ParticleCounterOffset;
data.ParticleStride = buffer->Stride; data.ParticleStride = draw.Buffer->Stride;
data.ParticleCapacity = buffer->Capacity; data.ParticleCapacity = draw.Buffer->Capacity;
int32 permutationIndex; int32 permutationIndex;
bool sortAscending; bool sortAscending;
switch (sortMode) switch (sortMode)
@@ -725,7 +821,7 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
{ {
Matrix matrix; Matrix matrix;
Matrix::Multiply(drawCall.World, viewProjection, matrix); Matrix::Multiply(draw.DrawCall.World, viewProjection, matrix);
Matrix::Transpose(matrix, data.PositionTransform); Matrix::Transpose(matrix, data.PositionTransform);
} }
else else
@@ -742,7 +838,7 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
data.ViewPosition = renderContextBatch.GetMainContext().View.Position; data.ViewPosition = renderContextBatch.GetMainContext().View.Position;
if (emitter->SimulationSpace == ParticlesSimulationSpace::Local) if (emitter->SimulationSpace == ParticlesSimulationSpace::Local)
{ {
Matrix::Transpose(drawCall.World, data.PositionTransform); Matrix::Transpose(draw.DrawCall.World, data.PositionTransform);
} }
else else
{ {
@@ -769,80 +865,28 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
} }
context->UpdateCB(GPUParticlesSortingCB, &data); context->UpdateCB(GPUParticlesSortingCB, &data);
context->BindCB(0, GPUParticlesSortingCB); context->BindCB(0, GPUParticlesSortingCB);
context->BindSR(0, buffer->GPU.Buffer->View()); context->BindSR(0, draw.Buffer->GPU.Buffer->View());
context->BindUA(0, buffer->GPU.SortingKeysBuffer->View()); context->BindUA(0, draw.Buffer->GPU.SortingKeysBuffer->View());
const int32 threadGroupSize = 1024; const int32 threadGroupSize = 1024;
context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1); context->Dispatch(GPUParticlesSortingCS[permutationIndex], Math::DivideAndRoundUp(draw.Buffer->GPU.ParticlesCountMax, threadGroupSize), 1, 1);
// Perform sorting // Perform sorting
BitonicSort::Instance()->Sort(context, buffer->GPU.SortingKeysBuffer, buffer->GPU.Buffer, data.ParticleCounterOffset, sortAscending, buffer->GPU.SortedIndices); BitonicSort::Instance()->Sort(context, draw.Buffer->GPU.SortingKeysBuffer, draw.Buffer->GPU.Buffer, data.ParticleCounterOffset, sortAscending, draw.Buffer->GPU.SortedIndices);
}
} }
} }
// Count draw calls to perform during this emitter rendering // Submit draw calls
int32 drawCalls = 0; for (GPUEmitterDraw& draw : GPUEmitterDraws)
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++)
{ {
if ((renderModulesIndices & (1u << moduleIndex)) == 0)
continue;
auto module = emitter->Graph.RenderModules.Get()[moduleIndex];
switch (module->TypeID)
{
// Sprite Rendering
case 400:
{
drawCalls++;
break;
}
// Model Rendering
case 403:
{
const auto model = (Model*)module->Assets[0].Get();
// TODO: model LOD picking for particles?
int32 lodIndex = 0;
ModelLOD& lod = model->LODs[lodIndex];
for (int32 meshIndex = 0; meshIndex < lod.Meshes.Count(); meshIndex++)
{
Mesh& mesh = lod.Meshes[meshIndex];
if (!mesh.IsInitialized())
continue;
drawCalls++;
}
break;
}
// Ribbon Rendering
case 404:
{
// Not supported
break;
}
// Volumetric Fog Rendering
case 405:
{
// Not supported
break;
}
}
}
if (drawCalls == 0)
return;
// Ensure to have enough space for indirect draw arguments
const uint32 minSize = drawCalls * sizeof(GPUDrawIndexedIndirectArgs);
if (buffer->GPU.IndirectDrawArgsBuffer->GetSize() < minSize)
buffer->GPU.IndirectDrawArgsBuffer->Init(GPUBufferDescription::Argument(minSize));
// Execute all rendering modules using indirect draw arguments // Execute all rendering modules using indirect draw arguments
int32 indirectDrawCallIndex = 0; ParticleEmitter* emitter = draw.Buffer->Emitter;
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++) for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++)
{ {
if ((renderModulesIndices & (1u << moduleIndex)) == 0) if ((draw.RenderModulesIndices & (1u << moduleIndex)) == 0)
continue; continue;
auto module = emitter->Graph.RenderModules.Get()[moduleIndex]; auto module = emitter->Graph.RenderModules.Get()[moduleIndex];
drawCall.Particle.Module = module; draw.DrawCall.Particle.Module = module;
switch (module->TypeID) switch (module->TypeID)
{ {
// Sprite Rendering // Sprite Rendering
@@ -850,24 +894,18 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
{ {
const auto material = (MaterialBase*)module->Assets[0].Get(); const auto material = (MaterialBase*)module->Assets[0].Get();
const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default; const auto moduleDrawModes = module->Values.Count() > 3 ? (DrawPass)module->Values[3].AsInt : DrawPass::Default;
auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes();
if (dp == DrawPass::None || SpriteRenderer.Init()) if (dp == DrawPass::None || SpriteRenderer.Init())
break; break;
drawCall.Material = material; draw.DrawCall.Material = material;
// Initialize indirect draw arguments
GPUDrawIndexedIndirectArgs args { SpriteParticleRenderer::IndexCount, 1, 0, 0, 0 };
const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs);
context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset);
context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset);
// Submit draw call // Submit draw call
SpriteRenderer.SetupDrawCall(drawCall); SpriteRenderer.SetupDrawCall(draw.DrawCall);
drawCall.InstanceCount = 0; draw.DrawCall.InstanceCount = 0;
drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; draw.DrawCall.Draw.IndirectArgsBuffer = GPUIndirectArgsBuffer;
drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); draw.DrawCall.Draw.IndirectArgsOffset = indirectArgsOffset;
renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, draw.StaticFlags, ShadowsCastingMode::DynamicOnly, draw.Bounds, draw.DrawCall, false, draw.SortOrder);
indirectDrawCallIndex++; indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs);
break; break;
} }
// Model Rendering // Model Rendering
@@ -876,8 +914,10 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
const auto model = (Model*)module->Assets[0].Get(); const auto model = (Model*)module->Assets[0].Get();
const auto material = (MaterialBase*)module->Assets[1].Get(); const auto material = (MaterialBase*)module->Assets[1].Get();
const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default; const auto moduleDrawModes = module->Values.Count() > 4 ? (DrawPass)module->Values[4].AsInt : DrawPass::Default;
auto dp = drawModes & moduleDrawModes & material->GetDrawModes(); auto dp = draw.DrawModes & moduleDrawModes & material->GetDrawModes();
drawCall.Material = material; if (dp == DrawPass::None)
break;
draw.DrawCall.Material = material;
// TODO: model LOD picking for particles? // TODO: model LOD picking for particles?
int32 lodIndex = 0; int32 lodIndex = 0;
@@ -889,19 +929,13 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
continue; continue;
// TODO: include mesh entry transformation, visibility and shadows mode? // TODO: include mesh entry transformation, visibility and shadows mode?
// Initialize indirect draw arguments
GPUDrawIndexedIndirectArgs args = { (uint32)mesh.GetTriangleCount() * 3, 1, 0, 0, 0 };
const uint32 argsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs);
context->UpdateBuffer(buffer->GPU.IndirectDrawArgsBuffer, &args, sizeof(args), argsOffset);
context->CopyBuffer(buffer->GPU.IndirectDrawArgsBuffer, buffer->GPU.Buffer, 4, argsOffset + 4, buffer->GPU.ParticleCounterOffset);
// Execute draw call // Execute draw call
mesh.GetDrawCallGeometry(drawCall); mesh.GetDrawCallGeometry(draw.DrawCall);
drawCall.InstanceCount = 0; draw.DrawCall.InstanceCount = 0;
drawCall.Draw.IndirectArgsBuffer = buffer->GPU.IndirectDrawArgsBuffer; draw.DrawCall.Draw.IndirectArgsBuffer = GPUIndirectArgsBuffer;
drawCall.Draw.IndirectArgsOffset = indirectDrawCallIndex * sizeof(GPUDrawIndexedIndirectArgs); draw.DrawCall.Draw.IndirectArgsOffset = indirectArgsOffset;
renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, staticFlags, ShadowsCastingMode::DynamicOnly, bounds, drawCall, false, sortOrder); renderContextBatch.GetMainContext().List->AddDrawCall(renderContextBatch, dp, draw.StaticFlags, ShadowsCastingMode::DynamicOnly, draw.Bounds, draw.DrawCall, false, draw.SortOrder);
indirectDrawCallIndex++; indirectArgsOffset += sizeof(GPUDrawIndexedIndirectArgs);
} }
break; break;
} }
@@ -919,6 +953,58 @@ void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buff
} }
} }
} }
}
GPUEmitterDraws.Clear();
}
void DrawEmitterGPU(RenderContextBatch& renderContextBatch, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const BoundingSphere& bounds, uint32 renderModulesIndices, int8 sortOrder)
{
// Setup drawing data
uint32 indirectArgsSize = 0;
ParticleEmitter* emitter = buffer->Emitter;
for (int32 moduleIndex = 0; moduleIndex < emitter->Graph.RenderModules.Count(); moduleIndex++)
{
if ((renderModulesIndices & (1u << moduleIndex)) == 0)
continue;
auto module = emitter->Graph.RenderModules.Get()[moduleIndex];
switch (module->TypeID)
{
// Sprite Rendering
case 400:
indirectArgsSize += sizeof(GPUDrawIndexedIndirectArgs);
break;
// Model Rendering
case 403:
{
const auto model = (Model*)module->Assets[0].Get();
// TODO: model LOD picking for particles?
int32 lodIndex = 0;
ModelLOD& lod = model->LODs[lodIndex];
indirectArgsSize += sizeof(GPUDrawIndexedIndirectArgs) * lod.Meshes.Count();
break;
}
}
}
if (indirectArgsSize == 0)
return;
bool sorting = buffer->Emitter->Graph.SortModules.HasItems() && renderContextBatch.GetMainContext().View.Pass != DrawPass::Depth && buffer->GPU.ParticlesCountMax != 0;
if (sorting && !buffer->GPU.SortedIndices)
buffer->AllocateSortBuffer();
// When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely
// Also, batch rendering all GPU emitters together for more efficient usage of GPU memory barriers and indirect arguments buffers allocation
RenderContext::GPULocker.Lock();
if (GPUEmitterDraws.Count() == 0)
{
// The first emitter schedules the drawing of all batched draws
renderContextBatch.GetMainContext().List->AddDelayedDraw([&renderContextBatch](RenderContext& renderContext)
{
DrawEmittersGPU(renderContextBatch);
});
}
GPUEmitterDraws.Add({ buffer, drawCall, drawModes, staticFlags, bounds, renderModulesIndices, indirectArgsSize, sortOrder, sorting });
RenderContext::GPULocker.Unlock();
} }
#endif #endif
@@ -1119,6 +1205,7 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
PROFILE_CPU_NAMED("GPUParticles"); PROFILE_CPU_NAMED("GPUParticles");
PROFILE_GPU("GPU Particles"); PROFILE_GPU("GPU Particles");
PROFILE_MEM(Particles); PROFILE_MEM(Particles);
ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker);
for (ParticleEffect* effect : GpuUpdateList) for (ParticleEffect* effect : GpuUpdateList)
{ {

View File

@@ -98,7 +98,6 @@ ParticleBuffer::~ParticleBuffer()
{ {
SAFE_DELETE_GPU_RESOURCE(GPU.Buffer); SAFE_DELETE_GPU_RESOURCE(GPU.Buffer);
SAFE_DELETE_GPU_RESOURCE(GPU.BufferSecondary); SAFE_DELETE_GPU_RESOURCE(GPU.BufferSecondary);
SAFE_DELETE_GPU_RESOURCE(GPU.IndirectDrawArgsBuffer);
SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeysBuffer); SAFE_DELETE_GPU_RESOURCE(GPU.SortingKeysBuffer);
SAFE_DELETE_GPU_RESOURCE(GPU.SortedIndices); SAFE_DELETE_GPU_RESOURCE(GPU.SortedIndices);
SAFE_DELETE(GPU.RibbonIndexBufferDynamic); SAFE_DELETE(GPU.RibbonIndexBufferDynamic);
@@ -146,7 +145,6 @@ bool ParticleBuffer::Init(ParticleEmitter* emitter)
GPU.BufferSecondary = GPUDevice::Instance->CreateBuffer(TEXT("ParticleBuffer B")); GPU.BufferSecondary = GPUDevice::Instance->CreateBuffer(TEXT("ParticleBuffer B"));
if (GPU.BufferSecondary->Init(GPU.Buffer->GetDescription())) if (GPU.BufferSecondary->Init(GPU.Buffer->GetDescription()))
return true; return true;
GPU.IndirectDrawArgsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("ParticleIndirectDrawArgsBuffer"));
GPU.PendingClear = true; GPU.PendingClear = true;
GPU.HasValidCount = false; GPU.HasValidCount = false;
GPU.ParticleCounterOffset = size; GPU.ParticleCounterOffset = size;

View File

@@ -203,11 +203,6 @@ public:
/// </summary> /// </summary>
GPUBuffer* BufferSecondary = nullptr; GPUBuffer* BufferSecondary = nullptr;
/// <summary>
/// The indirect draw command arguments buffer used by the GPU particles to invoke drawing on a GPU based on the particles amount (instances count).
/// </summary>
GPUBuffer* IndirectDrawArgsBuffer = nullptr;
/// <summary> /// <summary>
/// The GPU particles sorting buffer. Contains structure of particle index and the sorting key for every particle. Used to sort particles. /// The GPU particles sorting buffer. Contains structure of particle index and the sorting key for every particle. Used to sort particles.
/// </summary> /// </summary>

View File

@@ -266,7 +266,6 @@ void RenderList::DrainDelayedDraws(RenderContext& renderContext)
{ {
if (_delayedDraws.IsEmpty()) if (_delayedDraws.IsEmpty())
return; return;
PROFILE_GPU_CPU_NAMED("DelayedDraws");
for (DelayedDraw& e : _delayedDraws) for (DelayedDraw& e : _delayedDraws)
e(renderContext); e(renderContext);
_delayedDraws.SetCapacity(0); _delayedDraws.SetCapacity(0);