From 0ea555b041f1df5bc8e1a9d0565ec6700e8af0c8 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 8 Aug 2025 16:25:09 +0200 Subject: [PATCH] Optimize GPU Particles simulation to perform memory buffer copies/updates before dispatch --- .../Particles/Graph/GPU/GPUParticles.cpp | 36 ++++++++++----- .../Engine/Particles/Graph/GPU/GPUParticles.h | 13 ++---- Source/Engine/Particles/Particles.cpp | 44 ++++++++++++++++--- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp index e2dc78d59..54fbc254f 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.cpp @@ -130,14 +130,20 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit } } -void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +bool GPUParticles::CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const +{ + const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; + return data.GPU.DeltaTime > 0.0f && + emitter->Graph.Version == data.Version && + emitter->Graph.Version == data.Buffer->Version && + threads != 0 && + _mainCS; +} + +void GPUParticles::PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) { - PROFILE_CPU_ASSET(emitter); - ASSERT(emitter->Graph.Version == data.Version); - ASSERT(emitter->Graph.Version == data.Buffer->Version); uint32 counterDefaultValue = 0; const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; - const bool hasCB = _cbData.HasItems(); // Clear buffers if need to if (data.Buffer->GPU.PendingClear) @@ -156,14 +162,17 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic } } - // Skip if can - SceneRenderTask* viewTask = effect->GetRenderTask(); - const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; - if (data.GPU.DeltaTime <= 0.0f || threads == 0 || !_mainCS) - return; - // Clear destination buffer counter context->UpdateBuffer(data.Buffer->GPU.BufferSecondary, &counterDefaultValue, sizeof(counterDefaultValue), counterOffset); +} + +void GPUParticles::Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +{ + PROFILE_CPU_ASSET(emitter); + const bool hasCB = _cbData.HasItems(); + const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount; + const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; + SceneRenderTask* viewTask = effect->GetRenderTask(); // Setup parameters MaterialParameter::BindMeta bindMeta; @@ -265,6 +274,11 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic // Invoke Compute shader const int32 threadGroupSize = 1024; context->Dispatch(_mainCS, Math::Min(Math::DivideAndRoundUp(threads, threadGroupSize), GPU_MAX_CS_DISPATCH_THREAD_GROUPS), 1, 1); +} + +void GPUParticles::PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data) +{ + const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset; // Copy custom data for (int32 i = 0; i < CustomDataSize; i += 4) diff --git a/Source/Engine/Particles/Graph/GPU/GPUParticles.h b/Source/Engine/Particles/Graph/GPU/GPUParticles.h index fe7f26233..8d6008f53 100644 --- a/Source/Engine/Particles/Graph/GPU/GPUParticles.h +++ b/Source/Engine/Particles/Graph/GPU/GPUParticles.h @@ -79,15 +79,10 @@ public: /// The destination buffer offset from start (in bytes) to copy the counter (uint32). void CopyParticlesCount(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, GPUBuffer* dstBuffer, uint32 dstOffset); - /// - /// Performs the GPU particles simulation update using the graphics device. - /// - /// The GPU context that supports Compute. - /// The owning emitter. - /// The instance effect. - /// The index of the emitter in the particle system. - /// The instance data. - void Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + bool CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const; + void PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + void Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); + void PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data); }; #endif diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index 5ea195a00..9236efd9d 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -1279,6 +1279,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context) PROFILE_MEM(Particles); ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker); + // Collect valid emitter tracks to update + struct GPUSim + { + ParticleEffect* Effect; + ParticleEmitter* Emitter; + int32 EmitterIndex; + ParticleEmitterInstance& Data; + }; + Array sims; + sims.EnsureCapacity(Math::AlignUp(GpuUpdateList.Count(), 64)); // Preallocate with some slack for (ParticleEffect* effect : GpuUpdateList) { auto& instance = effect->Instance; @@ -1286,7 +1296,6 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (!particleSystem || !particleSystem->IsLoaded()) continue; - // Update all emitter tracks for (int32 j = 0; j < particleSystem->Tracks.Count(); j++) { const auto& track = particleSystem->Tracks[j]; @@ -1297,16 +1306,41 @@ void UpdateGPU(RenderTask* task, GPUContext* context) if (!emitter || !emitter->IsLoaded() || emitter->SimulationMode != ParticlesSimulationMode::GPU || instance.Emitters.Count() <= emitterIndex) continue; ParticleEmitterInstance& data = instance.Emitters[emitterIndex]; - if (!data.Buffer) + if (!data.Buffer || !emitter->GPU.CanSim(emitter, data)) continue; ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0); - - // TODO: use async context for particles to update them on compute during GBuffer rendering - emitter->GPU.Execute(context, emitter, effect, emitterIndex, data); + sims.Add({ effect, emitter, emitterIndex, data }); } } GpuUpdateList.Clear(); + // Pre-pass with buffers setup + { + PROFILE_CPU_NAMED("PreSim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + + // Pre-pass with buffers setup + { + PROFILE_GPU_CPU("Sim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + + // Post-pass with buffers setup + { + PROFILE_CPU_NAMED("PostSim"); + for (GPUSim& sim : sims) + { + sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data); + } + } + context->ResetSR(); context->ResetUA(); context->FlushState();