From b1710c4d018d89cc8b310fa92234c69d8e74dcc9 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 5 Aug 2025 22:53:09 +0200 Subject: [PATCH] Add async particles drawing (GPU emitters are sync) --- Source/Engine/Particles/ParticleEffect.cpp | 1 + Source/Engine/Particles/Particles.cpp | 18 ++++++++++++++++-- Source/Engine/Renderer/RenderList.cpp | 16 ++++++++++++++++ Source/Engine/Renderer/RenderList.h | 16 ++++++++++++++++ Source/Engine/Renderer/Renderer.cpp | 4 ++++ 5 files changed, 53 insertions(+), 2 deletions(-) diff --git a/Source/Engine/Particles/ParticleEffect.cpp b/Source/Engine/Particles/ParticleEffect.cpp index 3f1ac7055..4df492fe2 100644 --- a/Source/Engine/Particles/ParticleEffect.cpp +++ b/Source/Engine/Particles/ParticleEffect.cpp @@ -20,6 +20,7 @@ ParticleEffect::ParticleEffect(const SpawnParams& params) { _box = BoundingBox(_transform.Translation); BoundingSphere::FromBox(_box, _sphere); + _drawCategory = SceneRendering::SceneDrawAsync; } void ParticleEffectParameter::Init(ParticleEffect* effect, int32 emitterIndex, int32 paramIndex) diff --git a/Source/Engine/Particles/Particles.cpp b/Source/Engine/Particles/Particles.cpp index f98be5ec8..7378b9f7b 100644 --- a/Source/Engine/Particles/Particles.cpp +++ b/Source/Engine/Particles/Particles.cpp @@ -649,8 +649,22 @@ void CleanupGPUParticlesSorting() GPUParticlesSorting = nullptr; } -void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, ParticleEmitterInstance& emitterData, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) +void DrawEmitterGPU(RenderContext& renderContext, ParticleBuffer* buffer, DrawCall& drawCall, DrawPass drawModes, StaticFlags staticFlags, const RenderModulesIndices& renderModulesIndices, int8 sortOrder) { + if (!IsInMainThread()) + { + // Clone draw call data the hard way + byte drawCallCopy[sizeof(DrawCall)]; + Platform::MemoryCopy(&drawCallCopy, &drawCall, sizeof(DrawCall)); + + // When rendering in async, delay GPU particles drawing to be in sync by moving drawing into delayed callback post scene drawing to use GPUContext safely + // Move drawing into delayed callback post scene drawing to use GPUContext safely + renderContext.List->AddDelayedDraw([buffer, drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder](RenderContext& renderContext) + { + DrawEmitterGPU(renderContext, buffer, *(DrawCall*)drawCallCopy, drawModes, staticFlags, renderModulesIndices, sortOrder); + }); + return; + } const auto context = GPUDevice::Instance->GetMainContext(); auto emitter = buffer->Emitter; @@ -1092,7 +1106,7 @@ void Particles::DrawParticles(RenderContext& renderContext, ParticleEffect* effe break; #if COMPILE_WITH_GPU_PARTICLES case ParticlesSimulationMode::GPU: - DrawEmitterGPU(renderContext, buffer, drawCall, drawModes, staticFlags, emitterData, renderModulesIndices, sortOrder); + DrawEmitterGPU(renderContext, buffer, drawCall, drawModes, staticFlags, renderModulesIndices, sortOrder); break; #endif } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index dbb91680a..05f72f83f 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -255,6 +255,20 @@ void RenderList::AddSettingsBlend(IPostFxSettingsProvider* provider, float weigh Blendable.Add(blend); } +void RenderList::AddDelayedDraw(DelayedDraw&& func) +{ + MemPoolLocker.Lock(); // TODO: convert _delayedDraws into RenderListBuffer with usage of arena Memory for fast alloc + _delayedDraws.Add(MoveTemp(func)); + MemPoolLocker.Unlock(); +} + +void RenderList::DrainDelayedDraws(RenderContext& renderContext) +{ + for (DelayedDraw& e : _delayedDraws) + e(renderContext); + _delayedDraws.SetCapacity(0); +} + void RenderList::BlendSettings() { PROFILE_CPU(); @@ -459,6 +473,7 @@ RenderList::RenderList(const SpawnParams& params) , ObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , TempObjectBuffer(0, PixelFormat::R32G32B32A32_Float, false, TEXT("Object Buffer")) , _instanceBuffer(0, sizeof(ShaderObjectDrawInstanceData), TEXT("Instance Buffer"), GPUVertexLayout::Get({ { VertexElement::Types::Attribute0, 3, 0, 1, PixelFormat::R32_UInt } })) + , _delayedDraws(&Memory) { } @@ -490,6 +505,7 @@ void RenderList::Clear() PostFx.Clear(); Settings = PostProcessSettings(); Blendable.Clear(); + _delayedDraws.Clear(); _instanceBuffer.Clear(); ObjectBuffer.Clear(); TempObjectBuffer.Clear(); diff --git a/Source/Engine/Renderer/RenderList.h b/Source/Engine/Renderer/RenderList.h index 98f980e98..202afadc9 100644 --- a/Source/Engine/Renderer/RenderList.h +++ b/Source/Engine/Renderer/RenderList.h @@ -435,8 +435,24 @@ public: /// DynamicTypedBuffer TempObjectBuffer; + typedef Function DelayedDraw; + void AddDelayedDraw(DelayedDraw&& func); + void DrainDelayedDraws(RenderContext& renderContext); + + /// + /// Adds custom callback (eg. lambda) to invoke after scene draw calls are collected on a main thread (some async draw tasks might be active). Allows for safe usage of GPUContext for draw preparations or to perform GPU-driven drawing. + /// + template + FORCE_INLINE void AddDelayedDraw(const T& lambda) + { + DelayedDraw func; + func.Bind(&Memory, lambda); + AddDelayedDraw(MoveTemp(func)); + } + private: DynamicVertexBuffer _instanceBuffer; + Array _delayedDraws; public: /// diff --git a/Source/Engine/Renderer/Renderer.cpp b/Source/Engine/Renderer/Renderer.cpp index 7c7a53a09..77bfa5305 100644 --- a/Source/Engine/Renderer/Renderer.cpp +++ b/Source/Engine/Renderer/Renderer.cpp @@ -458,6 +458,10 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont JobSystem::Wait(label); renderContextBatch.WaitLabels.Clear(); + // Perform custom post-scene drawing (eg. GPU dispatches used by VFX) + for (RenderContext& e : renderContextBatch.Contexts) + e.List->DrainDelayedDraws(e); + #if USE_EDITOR GBufferPass::Instance()->OverrideDrawCalls(renderContext); #endif