From 8a98f466c51e10b1568908adfedc0ebf397c0fe1 Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Tue, 1 Nov 2022 12:50:17 +0100 Subject: [PATCH] Use named events for GPU passes for CPU profiling usability --- .../GI/DynamicDiffuseGlobalIllumination.cpp | 16 ++++---- .../Renderer/GI/GlobalSurfaceAtlasPass.cpp | 38 ++++++++++--------- .../Renderer/GlobalSignDistanceFieldPass.cpp | 10 ++--- Source/Engine/Renderer/VolumetricFogPass.cpp | 4 +- 4 files changed, 36 insertions(+), 32 deletions(-) diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index e1949a7d8..979e33a1c 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -498,7 +498,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes { - PROFILE_GPU_CPU("Probes Update"); + PROFILE_GPU_CPU_NAMED("Probes Update"); bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++) @@ -509,7 +509,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Classify probes (activation/deactivation and relocation) { - PROFILE_GPU_CPU("Classify Probes"); + PROFILE_GPU_CPU_NAMED("Classify Probes"); uint32 activeProbesCount = 0; context->UpdateBuffer(ddgiData.ActiveProbes, &activeProbesCount, sizeof(uint32), 0); threadGroupsX = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE); @@ -528,7 +528,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Build indirect args for probes updating (loop over active-only probes) { - PROFILE_GPU_CPU("Init Args"); + PROFILE_GPU_CPU_NAMED("Init Args"); context->BindSR(0, ddgiData.ActiveProbes->View()); context->BindUA(0, ddgiData.UpdateProbesInitArgs->View()); context->Dispatch(_csUpdateProbesInitArgs, 1, 1, 1); @@ -547,7 +547,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Trace rays from probes { - PROFILE_GPU_CPU("Trace Rays"); + PROFILE_GPU_CPU_NAMED("Trace Rays"); // Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe) context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr); @@ -568,7 +568,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes irradiance and distance textures (one thread-group per probe) { - PROFILE_GPU_CPU("Update Probes"); + PROFILE_GPU_CPU_NAMED("Update Probes"); context->BindSR(0, ddgiData.Result.ProbesState); context->BindSR(1, ddgiData.ProbesTrace->View()); context->BindSR(2, ddgiData.ActiveProbes->View()); @@ -587,7 +587,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes border pixels if (anyDirty) { - PROFILE_GPU_CPU("Update Borders"); + PROFILE_GPU_CPU_NAMED("Update Borders"); // Irradiance context->BindUA(0, ddgiData.Result.ProbesIrradiance); @@ -663,7 +663,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, // Render indirect lighting if (lightBuffer) { - PROFILE_GPU_CPU("Indirect Lighting"); + PROFILE_GPU_CPU_NAMED("Indirect Lighting"); #if 0 // DDGI indirect lighting debug preview context->Clear(lightBuffer, Color::Transparent); @@ -694,7 +694,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext, // Probes debug drawing if (renderContext.View.Mode == ViewMode::GlobalIllumination && lightBuffer) { - PROFILE_GPU_CPU("Debug Probes"); + PROFILE_GPU_CPU_NAMED("Debug Probes"); if (!_debugModel) _debugModel = Content::LoadAsyncInternal(TEXT("Editor/Primitives/Sphere")); if (!_debugMaterial) diff --git a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp index 8fb0c891c..ebc6a3624 100644 --- a/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp +++ b/Source/Engine/Renderer/GI/GlobalSurfaceAtlasPass.cpp @@ -378,7 +378,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co return false; } surfaceAtlasData.LastFrameUsed = currentFrame; - PROFILE_GPU_CPU("Global Surface Atlas"); + PROFILE_GPU_CPU_NAMED("Global Surface Atlas"); // Setup options auto* graphicsSettings = GraphicsSettings::Get(); @@ -488,6 +488,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co { if (viewMask & e.LayerMask && e.Bounds.Radius >= minObjectRadius && CollisionsHelper::DistanceSpherePoint(e.Bounds, viewPosition) < distance) { + //PROFILE_CPU_ACTOR(e.Actor); e.Actor->Draw(renderContext); } } @@ -495,23 +496,26 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } // Remove unused objects - for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it) { - if (it->Value.LastFrameUsed != currentFrame) + PROFILE_GPU_CPU_NAMED("Compact Objects"); + for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it) { - for (auto& tile : it->Value.Tiles) + if (it->Value.LastFrameUsed != currentFrame) { - if (tile) - tile->Free(); + for (auto& tile : it->Value.Tiles) + { + if (tile) + tile->Free(); + } + surfaceAtlasData.Objects.Remove(it); } - surfaceAtlasData.Objects.Remove(it); } } // Rasterize world geometry material properties into Global Surface Atlas if (_dirtyObjectsBuffer.Count() != 0) { - PROFILE_GPU_CPU("Rasterize Tiles"); + PROFILE_GPU_CPU_NAMED("Rasterize Tiles"); RenderContext renderContextTiles = renderContext; renderContextTiles.List = RenderList::GetFromPool(); @@ -533,7 +537,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co }; context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers))); { - PROFILE_GPU_CPU("Clear"); + PROFILE_GPU_CPU_NAMED("Clear"); if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES) { // Full-atlas hardware clear @@ -625,7 +629,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Send objects data to the GPU { - PROFILE_GPU_CPU("Update Objects"); + PROFILE_GPU_CPU_NAMED("Update Objects"); surfaceAtlasData.ObjectsBuffer.Flush(context); } @@ -646,7 +650,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer. // Each chunk data contains objects count + all objects addresses. // This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects. - PROFILE_GPU_CPU("Cull Objects"); + PROFILE_GPU_CPU_NAMED("Cull Objects"); uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f); // Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity @@ -776,7 +780,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Render direct lighting into atlas if (surfaceAtlasData.Objects.Count() != 0) { - PROFILE_GPU_CPU("Direct Lighting"); + PROFILE_GPU_CPU_NAMED("Direct Lighting"); context->SetViewportAndScissors(Viewport(0, 0, (float)resolution, (float)resolution)); context->SetRenderTarget(surfaceAtlasData.AtlasLighting->View()); context->BindSR(0, surfaceAtlasData.AtlasGBuffer0->View()); @@ -876,7 +880,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co // Copy emissive light into the final direct lighting atlas { - PROFILE_GPU_CPU("Copy Emissive"); + PROFILE_GPU_CPU_NAMED("Copy Emissive"); _vertexBuffer->Clear(); for (const auto& e : surfaceAtlasData.Objects) { @@ -921,7 +925,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co continue; // Draw draw light - PROFILE_GPU_CPU("Directional Light"); + PROFILE_GPU_CPU_NAMED("Directional Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); // TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace light.SetupLightData(&data.Light, useShadow); @@ -955,7 +959,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co continue; // Draw draw light - PROFILE_GPU_CPU("Point Light"); + PROFILE_GPU_CPU_NAMED("Point Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); light.SetupLightData(&data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; @@ -988,7 +992,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co continue; // Draw draw light - PROFILE_GPU_CPU("Spot Light"); + PROFILE_GPU_CPU_NAMED("Spot Light"); const bool useShadow = CanRenderShadow(renderContext.View, light); light.SetupLightData(&data.Light, useShadow); data.Light.Color *= light.IndirectLightingIntensity; @@ -1031,7 +1035,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co } if (_vertexBuffer->Data.Count() == 0) break; - PROFILE_GPU_CPU("DDGI"); + PROFILE_GPU_CPU_NAMED("DDGI"); data.DDGI = bindingDataDDGI.Constants; data.Light.Radius = giSettings.BounceIntensity / bindingDataDDGI.Constants.IndirectLightingIntensity; // Reuse for smaller CB context->BindSR(5, bindingDataDDGI.ProbesState); diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp index e751c8f13..7952ee7d6 100644 --- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp +++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp @@ -472,7 +472,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex GPUTexture* tmpMip = nullptr; if (updated) { - PROFILE_GPU_CPU("Init"); + PROFILE_GPU_CPU_NAMED("Init"); for (auto& cascade : sdfData.Cascades) { cascade.NonEmptyChunks.Clear(); @@ -589,7 +589,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE; bool anyChunkDispatch = false; { - PROFILE_GPU_CPU("Clear Chunks"); + PROFILE_GPU_CPU_NAMED("Clear Chunks"); for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it) { auto& key = it->Item; @@ -606,7 +606,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex } } { - PROFILE_GPU_CPU("Rasterize Chunks"); + PROFILE_GPU_CPU_NAMED("Rasterize Chunks"); // Update static chunks for (auto it = chunks.Begin(); it.IsNotEnd(); ++it) @@ -637,7 +637,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex const auto& objectIndexToDataIndex = ObjectIndexToDataIndexCache; if (chunks.Count() != 0) { - PROFILE_GPU_CPU("Update Objects"); + PROFILE_GPU_CPU_NAMED("Update Objects"); auto& objectIndexToDataIndexCache = ObjectIndexToDataIndexCache; objectIndexToDataIndexCache.Clear(); @@ -827,7 +827,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex // Generate mip out of cascade (empty chunks have distance value 1 which is incorrect so mip will be used as a fallback - lower res) if (updated || anyChunkDispatch) { - PROFILE_GPU_CPU("Generate Mip"); + PROFILE_GPU_CPU_NAMED("Generate Mip"); context->ResetUA(); const int32 mipDispatchGroups = Math::DivideAndRoundUp(resolutionMip, GLOBAL_SDF_MIP_GROUP_SIZE); static_assert((GLOBAL_SDF_MIP_FLOODS % 2) == 1, "Invalid Global SDF mip flood iterations count."); diff --git a/Source/Engine/Renderer/VolumetricFogPass.cpp b/Source/Engine/Renderer/VolumetricFogPass.cpp index 6a25ebf5a..3185fad6f 100644 --- a/Source/Engine/Renderer/VolumetricFogPass.cpp +++ b/Source/Engine/Renderer/VolumetricFogPass.cpp @@ -518,7 +518,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) // Render local fog particles if (renderContext.List->VolumetricFogParticles.HasItems()) { - PROFILE_GPU_CPU("Local Fog"); + PROFILE_GPU_CPU_NAMED("Local Fog"); // Bind the output GPUTextureView* rt[] = { vBufferA->ViewVolume(), vBufferB->ViewVolume() }; @@ -616,7 +616,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext) // Skip if no lights to render if (pointLights.Count() + spotLights.Count()) { - PROFILE_GPU_CPU("Lights Injection"); + PROFILE_GPU_CPU_NAMED("Lights Injection"); // Allocate temporary buffer for light scattering injection localShadowedLightScattering = GetLocalShadowedLightScattering(renderContext, context, options);