Optimize GPU Particles simulation to perform memory buffer copies/updates before dispatch

This commit is contained in:
Wojtek Figat
2025-08-08 16:25:09 +02:00
parent 90d1e63b58
commit 0ea555b041
3 changed files with 68 additions and 25 deletions

View File

@@ -130,14 +130,20 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit
}
}
void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
bool GPUParticles::CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const
{
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
return data.GPU.DeltaTime > 0.0f &&
emitter->Graph.Version == data.Version &&
emitter->Graph.Version == data.Buffer->Version &&
threads != 0 &&
_mainCS;
}
void GPUParticles::PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
{
PROFILE_CPU_ASSET(emitter);
ASSERT(emitter->Graph.Version == data.Version);
ASSERT(emitter->Graph.Version == data.Buffer->Version);
uint32 counterDefaultValue = 0;
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
const bool hasCB = _cbData.HasItems();
// Clear buffers if need to
if (data.Buffer->GPU.PendingClear)
@@ -156,14 +162,17 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic
}
}
// Skip if can
SceneRenderTask* viewTask = effect->GetRenderTask();
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
if (data.GPU.DeltaTime <= 0.0f || threads == 0 || !_mainCS)
return;
// Clear destination buffer counter
context->UpdateBuffer(data.Buffer->GPU.BufferSecondary, &counterDefaultValue, sizeof(counterDefaultValue), counterOffset);
}
void GPUParticles::Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
{
PROFILE_CPU_ASSET(emitter);
const bool hasCB = _cbData.HasItems();
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
SceneRenderTask* viewTask = effect->GetRenderTask();
// Setup parameters
MaterialParameter::BindMeta bindMeta;
@@ -265,6 +274,11 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic
// Invoke Compute shader
const int32 threadGroupSize = 1024;
context->Dispatch(_mainCS, Math::Min(Math::DivideAndRoundUp(threads, threadGroupSize), GPU_MAX_CS_DISPATCH_THREAD_GROUPS), 1, 1);
}
void GPUParticles::PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
{
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
// Copy custom data
for (int32 i = 0; i < CustomDataSize; i += 4)

View File

@@ -79,15 +79,10 @@ public:
/// <param name="dstOffset">The destination buffer offset from start (in bytes) to copy the counter (uint32).</param>
void CopyParticlesCount(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, GPUBuffer* dstBuffer, uint32 dstOffset);
/// <summary>
/// Performs the GPU particles simulation update using the graphics device.
/// </summary>
/// <param name="context">The GPU context that supports Compute.</param>
/// <param name="emitter">The owning emitter.</param>
/// <param name="effect">The instance effect.</param>
/// <param name="emitterIndex">The index of the emitter in the particle system.</param>
/// <param name="data">The instance data.</param>
void Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
bool CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const;
void PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
void Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
void PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
};
#endif

View File

@@ -1279,6 +1279,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
PROFILE_MEM(Particles);
ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker);
// Collect valid emitter tracks to update
struct GPUSim
{
ParticleEffect* Effect;
ParticleEmitter* Emitter;
int32 EmitterIndex;
ParticleEmitterInstance& Data;
};
Array<GPUSim, RendererAllocation> sims;
sims.EnsureCapacity(Math::AlignUp(GpuUpdateList.Count(), 64)); // Preallocate with some slack
for (ParticleEffect* effect : GpuUpdateList)
{
auto& instance = effect->Instance;
@@ -1286,7 +1296,6 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
if (!particleSystem || !particleSystem->IsLoaded())
continue;
// Update all emitter tracks
for (int32 j = 0; j < particleSystem->Tracks.Count(); j++)
{
const auto& track = particleSystem->Tracks[j];
@@ -1297,16 +1306,41 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
if (!emitter || !emitter->IsLoaded() || emitter->SimulationMode != ParticlesSimulationMode::GPU || instance.Emitters.Count() <= emitterIndex)
continue;
ParticleEmitterInstance& data = instance.Emitters[emitterIndex];
if (!data.Buffer)
if (!data.Buffer || !emitter->GPU.CanSim(emitter, data))
continue;
ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0);
// TODO: use async context for particles to update them on compute during GBuffer rendering
emitter->GPU.Execute(context, emitter, effect, emitterIndex, data);
sims.Add({ effect, emitter, emitterIndex, data });
}
}
GpuUpdateList.Clear();
// Pre-pass with buffers setup
{
PROFILE_CPU_NAMED("PreSim");
for (GPUSim& sim : sims)
{
sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
}
}
// Pre-pass with buffers setup
{
PROFILE_GPU_CPU("Sim");
for (GPUSim& sim : sims)
{
sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
}
}
// Post-pass with buffers setup
{
PROFILE_CPU_NAMED("PostSim");
for (GPUSim& sim : sims)
{
sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
}
}
context->ResetSR();
context->ResetUA();
context->FlushState();