Use named events for GPU passes for CPU profiling usability

This commit is contained in:
Wojtek Figat
2022-11-01 12:50:17 +01:00
parent a945e2c2a5
commit 8a98f466c5
4 changed files with 36 additions and 32 deletions

View File

@@ -498,7 +498,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Update probes
{
PROFILE_GPU_CPU("Probes Update");
PROFILE_GPU_CPU_NAMED("Probes Update");
bool anyDirty = false;
uint32 threadGroupsX, threadGroupsY;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
@@ -509,7 +509,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU("Classify Probes");
PROFILE_GPU_CPU_NAMED("Classify Probes");
uint32 activeProbesCount = 0;
context->UpdateBuffer(ddgiData.ActiveProbes, &activeProbesCount, sizeof(uint32), 0);
threadGroupsX = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
@@ -528,7 +528,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Build indirect args for probes updating (loop over active-only probes)
{
PROFILE_GPU_CPU("Init Args");
PROFILE_GPU_CPU_NAMED("Init Args");
context->BindSR(0, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.UpdateProbesInitArgs->View());
context->Dispatch(_csUpdateProbesInitArgs, 1, 1, 1);
@@ -547,7 +547,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Trace rays from probes
{
PROFILE_GPU_CPU("Trace Rays");
PROFILE_GPU_CPU_NAMED("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe)
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
@@ -568,7 +568,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Update probes irradiance and distance textures (one thread-group per probe)
{
PROFILE_GPU_CPU("Update Probes");
PROFILE_GPU_CPU_NAMED("Update Probes");
context->BindSR(0, ddgiData.Result.ProbesState);
context->BindSR(1, ddgiData.ProbesTrace->View());
context->BindSR(2, ddgiData.ActiveProbes->View());
@@ -587,7 +587,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
// Update probes border pixels
if (anyDirty)
{
PROFILE_GPU_CPU("Update Borders");
PROFILE_GPU_CPU_NAMED("Update Borders");
// Irradiance
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
@@ -663,7 +663,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// Render indirect lighting
if (lightBuffer)
{
PROFILE_GPU_CPU("Indirect Lighting");
PROFILE_GPU_CPU_NAMED("Indirect Lighting");
#if 0
// DDGI indirect lighting debug preview
context->Clear(lightBuffer, Color::Transparent);
@@ -694,7 +694,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// Probes debug drawing
if (renderContext.View.Mode == ViewMode::GlobalIllumination && lightBuffer)
{
PROFILE_GPU_CPU("Debug Probes");
PROFILE_GPU_CPU_NAMED("Debug Probes");
if (!_debugModel)
_debugModel = Content::LoadAsyncInternal<Model>(TEXT("Editor/Primitives/Sphere"));
if (!_debugMaterial)

View File

@@ -378,7 +378,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
return false;
}
surfaceAtlasData.LastFrameUsed = currentFrame;
PROFILE_GPU_CPU("Global Surface Atlas");
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
// Setup options
auto* graphicsSettings = GraphicsSettings::Get();
@@ -488,6 +488,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
{
if (viewMask & e.LayerMask && e.Bounds.Radius >= minObjectRadius && CollisionsHelper::DistanceSpherePoint(e.Bounds, viewPosition) < distance)
{
//PROFILE_CPU_ACTOR(e.Actor);
e.Actor->Draw(renderContext);
}
}
@@ -495,23 +496,26 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
}
// Remove unused objects
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
{
if (it->Value.LastFrameUsed != currentFrame)
PROFILE_GPU_CPU_NAMED("Compact Objects");
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
{
for (auto& tile : it->Value.Tiles)
if (it->Value.LastFrameUsed != currentFrame)
{
if (tile)
tile->Free();
for (auto& tile : it->Value.Tiles)
{
if (tile)
tile->Free();
}
surfaceAtlasData.Objects.Remove(it);
}
surfaceAtlasData.Objects.Remove(it);
}
}
// Rasterize world geometry material properties into Global Surface Atlas
if (_dirtyObjectsBuffer.Count() != 0)
{
PROFILE_GPU_CPU("Rasterize Tiles");
PROFILE_GPU_CPU_NAMED("Rasterize Tiles");
RenderContext renderContextTiles = renderContext;
renderContextTiles.List = RenderList::GetFromPool();
@@ -533,7 +537,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
};
context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
{
PROFILE_GPU_CPU("Clear");
PROFILE_GPU_CPU_NAMED("Clear");
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES)
{
// Full-atlas hardware clear
@@ -625,7 +629,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Send objects data to the GPU
{
PROFILE_GPU_CPU("Update Objects");
PROFILE_GPU_CPU_NAMED("Update Objects");
surfaceAtlasData.ObjectsBuffer.Flush(context);
}
@@ -646,7 +650,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer.
// Each chunk data contains objects count + all objects addresses.
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects.
PROFILE_GPU_CPU("Cull Objects");
PROFILE_GPU_CPU_NAMED("Cull Objects");
uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f);
// Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity
@@ -776,7 +780,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Render direct lighting into atlas
if (surfaceAtlasData.Objects.Count() != 0)
{
PROFILE_GPU_CPU("Direct Lighting");
PROFILE_GPU_CPU_NAMED("Direct Lighting");
context->SetViewportAndScissors(Viewport(0, 0, (float)resolution, (float)resolution));
context->SetRenderTarget(surfaceAtlasData.AtlasLighting->View());
context->BindSR(0, surfaceAtlasData.AtlasGBuffer0->View());
@@ -876,7 +880,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Copy emissive light into the final direct lighting atlas
{
PROFILE_GPU_CPU("Copy Emissive");
PROFILE_GPU_CPU_NAMED("Copy Emissive");
_vertexBuffer->Clear();
for (const auto& e : surfaceAtlasData.Objects)
{
@@ -921,7 +925,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
continue;
// Draw draw light
PROFILE_GPU_CPU("Directional Light");
PROFILE_GPU_CPU_NAMED("Directional Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
// TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace
light.SetupLightData(&data.Light, useShadow);
@@ -955,7 +959,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
continue;
// Draw draw light
PROFILE_GPU_CPU("Point Light");
PROFILE_GPU_CPU_NAMED("Point Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
light.SetupLightData(&data.Light, useShadow);
data.Light.Color *= light.IndirectLightingIntensity;
@@ -988,7 +992,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
continue;
// Draw draw light
PROFILE_GPU_CPU("Spot Light");
PROFILE_GPU_CPU_NAMED("Spot Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
light.SetupLightData(&data.Light, useShadow);
data.Light.Color *= light.IndirectLightingIntensity;
@@ -1031,7 +1035,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
}
if (_vertexBuffer->Data.Count() == 0)
break;
PROFILE_GPU_CPU("DDGI");
PROFILE_GPU_CPU_NAMED("DDGI");
data.DDGI = bindingDataDDGI.Constants;
data.Light.Radius = giSettings.BounceIntensity / bindingDataDDGI.Constants.IndirectLightingIntensity; // Reuse for smaller CB
context->BindSR(5, bindingDataDDGI.ProbesState);

View File

@@ -472,7 +472,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
GPUTexture* tmpMip = nullptr;
if (updated)
{
PROFILE_GPU_CPU("Init");
PROFILE_GPU_CPU_NAMED("Init");
for (auto& cascade : sdfData.Cascades)
{
cascade.NonEmptyChunks.Clear();
@@ -589,7 +589,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE;
bool anyChunkDispatch = false;
{
PROFILE_GPU_CPU("Clear Chunks");
PROFILE_GPU_CPU_NAMED("Clear Chunks");
for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it)
{
auto& key = it->Item;
@@ -606,7 +606,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
}
{
PROFILE_GPU_CPU("Rasterize Chunks");
PROFILE_GPU_CPU_NAMED("Rasterize Chunks");
// Update static chunks
for (auto it = chunks.Begin(); it.IsNotEnd(); ++it)
@@ -637,7 +637,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
const auto& objectIndexToDataIndex = ObjectIndexToDataIndexCache;
if (chunks.Count() != 0)
{
PROFILE_GPU_CPU("Update Objects");
PROFILE_GPU_CPU_NAMED("Update Objects");
auto& objectIndexToDataIndexCache = ObjectIndexToDataIndexCache;
objectIndexToDataIndexCache.Clear();
@@ -827,7 +827,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
// Generate mip out of cascade (empty chunks have distance value 1 which is incorrect so mip will be used as a fallback - lower res)
if (updated || anyChunkDispatch)
{
PROFILE_GPU_CPU("Generate Mip");
PROFILE_GPU_CPU_NAMED("Generate Mip");
context->ResetUA();
const int32 mipDispatchGroups = Math::DivideAndRoundUp(resolutionMip, GLOBAL_SDF_MIP_GROUP_SIZE);
static_assert((GLOBAL_SDF_MIP_FLOODS % 2) == 1, "Invalid Global SDF mip flood iterations count.");

View File

@@ -518,7 +518,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
// Render local fog particles
if (renderContext.List->VolumetricFogParticles.HasItems())
{
PROFILE_GPU_CPU("Local Fog");
PROFILE_GPU_CPU_NAMED("Local Fog");
// Bind the output
GPUTextureView* rt[] = { vBufferA->ViewVolume(), vBufferB->ViewVolume() };
@@ -616,7 +616,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
// Skip if no lights to render
if (pointLights.Count() + spotLights.Count())
{
PROFILE_GPU_CPU("Lights Injection");
PROFILE_GPU_CPU_NAMED("Lights Injection");
// Allocate temporary buffer for light scattering injection
localShadowedLightScattering = GetLocalShadowedLightScattering(renderContext, context, options);