Optimize GPU Particles simulation to perform memory buffer copies/updates before dispatch
This commit is contained in:
@@ -130,14 +130,20 @@ void GPUParticles::CopyParticlesCount(GPUContext* context, ParticleEmitter* emit
|
||||
}
|
||||
}
|
||||
|
||||
void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
|
||||
bool GPUParticles::CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const
|
||||
{
|
||||
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
|
||||
return data.GPU.DeltaTime > 0.0f &&
|
||||
emitter->Graph.Version == data.Version &&
|
||||
emitter->Graph.Version == data.Buffer->Version &&
|
||||
threads != 0 &&
|
||||
_mainCS;
|
||||
}
|
||||
|
||||
void GPUParticles::PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
|
||||
{
|
||||
PROFILE_CPU_ASSET(emitter);
|
||||
ASSERT(emitter->Graph.Version == data.Version);
|
||||
ASSERT(emitter->Graph.Version == data.Buffer->Version);
|
||||
uint32 counterDefaultValue = 0;
|
||||
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
|
||||
const bool hasCB = _cbData.HasItems();
|
||||
|
||||
// Clear buffers if need to
|
||||
if (data.Buffer->GPU.PendingClear)
|
||||
@@ -156,14 +162,17 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic
|
||||
}
|
||||
}
|
||||
|
||||
// Skip if can
|
||||
SceneRenderTask* viewTask = effect->GetRenderTask();
|
||||
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
|
||||
if (data.GPU.DeltaTime <= 0.0f || threads == 0 || !_mainCS)
|
||||
return;
|
||||
|
||||
// Clear destination buffer counter
|
||||
context->UpdateBuffer(data.Buffer->GPU.BufferSecondary, &counterDefaultValue, sizeof(counterDefaultValue), counterOffset);
|
||||
}
|
||||
|
||||
void GPUParticles::Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
|
||||
{
|
||||
PROFILE_CPU_ASSET(emitter);
|
||||
const bool hasCB = _cbData.HasItems();
|
||||
const int32 threads = data.Buffer->GPU.ParticlesCountMax + data.GPU.SpawnCount;
|
||||
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
|
||||
SceneRenderTask* viewTask = effect->GetRenderTask();
|
||||
|
||||
// Setup parameters
|
||||
MaterialParameter::BindMeta bindMeta;
|
||||
@@ -265,6 +274,11 @@ void GPUParticles::Execute(GPUContext* context, ParticleEmitter* emitter, Partic
|
||||
// Invoke Compute shader
|
||||
const int32 threadGroupSize = 1024;
|
||||
context->Dispatch(_mainCS, Math::Min(Math::DivideAndRoundUp(threads, threadGroupSize), GPU_MAX_CS_DISPATCH_THREAD_GROUPS), 1, 1);
|
||||
}
|
||||
|
||||
void GPUParticles::PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data)
|
||||
{
|
||||
const uint32 counterOffset = data.Buffer->GPU.ParticleCounterOffset;
|
||||
|
||||
// Copy custom data
|
||||
for (int32 i = 0; i < CustomDataSize; i += 4)
|
||||
|
||||
@@ -79,15 +79,10 @@ public:
|
||||
/// <param name="dstOffset">The destination buffer offset from start (in bytes) to copy the counter (uint32).</param>
|
||||
void CopyParticlesCount(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, GPUBuffer* dstBuffer, uint32 dstOffset);
|
||||
|
||||
/// <summary>
|
||||
/// Performs the GPU particles simulation update using the graphics device.
|
||||
/// </summary>
|
||||
/// <param name="context">The GPU context that supports Compute.</param>
|
||||
/// <param name="emitter">The owning emitter.</param>
|
||||
/// <param name="effect">The instance effect.</param>
|
||||
/// <param name="emitterIndex">The index of the emitter in the particle system.</param>
|
||||
/// <param name="data">The instance data.</param>
|
||||
void Execute(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
|
||||
bool CanSim(const ParticleEmitter* emitter, const ParticleEmitterInstance& data) const;
|
||||
void PreSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
|
||||
void Sim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
|
||||
void PostSim(GPUContext* context, ParticleEmitter* emitter, ParticleEffect* effect, int32 emitterIndex, ParticleEmitterInstance& data);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1279,6 +1279,16 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
|
||||
PROFILE_MEM(Particles);
|
||||
ConcurrentSystemLocker::ReadScope systemScope(Particles::SystemLocker);
|
||||
|
||||
// Collect valid emitter tracks to update
|
||||
struct GPUSim
|
||||
{
|
||||
ParticleEffect* Effect;
|
||||
ParticleEmitter* Emitter;
|
||||
int32 EmitterIndex;
|
||||
ParticleEmitterInstance& Data;
|
||||
};
|
||||
Array<GPUSim, RendererAllocation> sims;
|
||||
sims.EnsureCapacity(Math::AlignUp(GpuUpdateList.Count(), 64)); // Preallocate with some slack
|
||||
for (ParticleEffect* effect : GpuUpdateList)
|
||||
{
|
||||
auto& instance = effect->Instance;
|
||||
@@ -1286,7 +1296,6 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
|
||||
if (!particleSystem || !particleSystem->IsLoaded())
|
||||
continue;
|
||||
|
||||
// Update all emitter tracks
|
||||
for (int32 j = 0; j < particleSystem->Tracks.Count(); j++)
|
||||
{
|
||||
const auto& track = particleSystem->Tracks[j];
|
||||
@@ -1297,16 +1306,41 @@ void UpdateGPU(RenderTask* task, GPUContext* context)
|
||||
if (!emitter || !emitter->IsLoaded() || emitter->SimulationMode != ParticlesSimulationMode::GPU || instance.Emitters.Count() <= emitterIndex)
|
||||
continue;
|
||||
ParticleEmitterInstance& data = instance.Emitters[emitterIndex];
|
||||
if (!data.Buffer)
|
||||
if (!data.Buffer || !emitter->GPU.CanSim(emitter, data))
|
||||
continue;
|
||||
ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0);
|
||||
|
||||
// TODO: use async context for particles to update them on compute during GBuffer rendering
|
||||
emitter->GPU.Execute(context, emitter, effect, emitterIndex, data);
|
||||
sims.Add({ effect, emitter, emitterIndex, data });
|
||||
}
|
||||
}
|
||||
GpuUpdateList.Clear();
|
||||
|
||||
// Pre-pass with buffers setup
|
||||
{
|
||||
PROFILE_CPU_NAMED("PreSim");
|
||||
for (GPUSim& sim : sims)
|
||||
{
|
||||
sim.Emitter->GPU.PreSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-pass with buffers setup
|
||||
{
|
||||
PROFILE_GPU_CPU("Sim");
|
||||
for (GPUSim& sim : sims)
|
||||
{
|
||||
sim.Emitter->GPU.Sim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
|
||||
}
|
||||
}
|
||||
|
||||
// Post-pass with buffers setup
|
||||
{
|
||||
PROFILE_CPU_NAMED("PostSim");
|
||||
for (GPUSim& sim : sims)
|
||||
{
|
||||
sim.Emitter->GPU.PostSim(context, sim.Emitter, sim.Effect, sim.EmitterIndex, sim.Data);
|
||||
}
|
||||
}
|
||||
|
||||
context->ResetSR();
|
||||
context->ResetUA();
|
||||
context->FlushState();
|
||||
|
||||
Reference in New Issue
Block a user