Use named events for GPU passes for CPU profiling usability
This commit is contained in:
@@ -498,7 +498,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
|
||||
// Update probes
|
||||
{
|
||||
PROFILE_GPU_CPU("Probes Update");
|
||||
PROFILE_GPU_CPU_NAMED("Probes Update");
|
||||
bool anyDirty = false;
|
||||
uint32 threadGroupsX, threadGroupsY;
|
||||
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
|
||||
@@ -509,7 +509,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
|
||||
// Classify probes (activation/deactivation and relocation)
|
||||
{
|
||||
PROFILE_GPU_CPU("Classify Probes");
|
||||
PROFILE_GPU_CPU_NAMED("Classify Probes");
|
||||
uint32 activeProbesCount = 0;
|
||||
context->UpdateBuffer(ddgiData.ActiveProbes, &activeProbesCount, sizeof(uint32), 0);
|
||||
threadGroupsX = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
|
||||
@@ -528,7 +528,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
|
||||
// Build indirect args for probes updating (loop over active-only probes)
|
||||
{
|
||||
PROFILE_GPU_CPU("Init Args");
|
||||
PROFILE_GPU_CPU_NAMED("Init Args");
|
||||
context->BindSR(0, ddgiData.ActiveProbes->View());
|
||||
context->BindUA(0, ddgiData.UpdateProbesInitArgs->View());
|
||||
context->Dispatch(_csUpdateProbesInitArgs, 1, 1, 1);
|
||||
@@ -547,7 +547,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
|
||||
// Trace rays from probes
|
||||
{
|
||||
PROFILE_GPU_CPU("Trace Rays");
|
||||
PROFILE_GPU_CPU_NAMED("Trace Rays");
|
||||
|
||||
// Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe)
|
||||
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
|
||||
@@ -568,7 +568,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
|
||||
// Update probes irradiance and distance textures (one thread-group per probe)
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Probes");
|
||||
PROFILE_GPU_CPU_NAMED("Update Probes");
|
||||
context->BindSR(0, ddgiData.Result.ProbesState);
|
||||
context->BindSR(1, ddgiData.ProbesTrace->View());
|
||||
context->BindSR(2, ddgiData.ActiveProbes->View());
|
||||
@@ -587,7 +587,7 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont
|
||||
// Update probes border pixels
|
||||
if (anyDirty)
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Borders");
|
||||
PROFILE_GPU_CPU_NAMED("Update Borders");
|
||||
|
||||
// Irradiance
|
||||
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
|
||||
@@ -663,7 +663,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
|
||||
// Render indirect lighting
|
||||
if (lightBuffer)
|
||||
{
|
||||
PROFILE_GPU_CPU("Indirect Lighting");
|
||||
PROFILE_GPU_CPU_NAMED("Indirect Lighting");
|
||||
#if 0
|
||||
// DDGI indirect lighting debug preview
|
||||
context->Clear(lightBuffer, Color::Transparent);
|
||||
@@ -694,7 +694,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
|
||||
// Probes debug drawing
|
||||
if (renderContext.View.Mode == ViewMode::GlobalIllumination && lightBuffer)
|
||||
{
|
||||
PROFILE_GPU_CPU("Debug Probes");
|
||||
PROFILE_GPU_CPU_NAMED("Debug Probes");
|
||||
if (!_debugModel)
|
||||
_debugModel = Content::LoadAsyncInternal<Model>(TEXT("Editor/Primitives/Sphere"));
|
||||
if (!_debugMaterial)
|
||||
|
||||
@@ -378,7 +378,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
return false;
|
||||
}
|
||||
surfaceAtlasData.LastFrameUsed = currentFrame;
|
||||
PROFILE_GPU_CPU("Global Surface Atlas");
|
||||
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
|
||||
|
||||
// Setup options
|
||||
auto* graphicsSettings = GraphicsSettings::Get();
|
||||
@@ -488,6 +488,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
{
|
||||
if (viewMask & e.LayerMask && e.Bounds.Radius >= minObjectRadius && CollisionsHelper::DistanceSpherePoint(e.Bounds, viewPosition) < distance)
|
||||
{
|
||||
//PROFILE_CPU_ACTOR(e.Actor);
|
||||
e.Actor->Draw(renderContext);
|
||||
}
|
||||
}
|
||||
@@ -495,23 +496,26 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
}
|
||||
|
||||
// Remove unused objects
|
||||
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
if (it->Value.LastFrameUsed != currentFrame)
|
||||
PROFILE_GPU_CPU_NAMED("Compact Objects");
|
||||
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
if (it->Value.LastFrameUsed != currentFrame)
|
||||
{
|
||||
if (tile)
|
||||
tile->Free();
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
{
|
||||
if (tile)
|
||||
tile->Free();
|
||||
}
|
||||
surfaceAtlasData.Objects.Remove(it);
|
||||
}
|
||||
surfaceAtlasData.Objects.Remove(it);
|
||||
}
|
||||
}
|
||||
|
||||
// Rasterize world geometry material properties into Global Surface Atlas
|
||||
if (_dirtyObjectsBuffer.Count() != 0)
|
||||
{
|
||||
PROFILE_GPU_CPU("Rasterize Tiles");
|
||||
PROFILE_GPU_CPU_NAMED("Rasterize Tiles");
|
||||
|
||||
RenderContext renderContextTiles = renderContext;
|
||||
renderContextTiles.List = RenderList::GetFromPool();
|
||||
@@ -533,7 +537,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
};
|
||||
context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
|
||||
{
|
||||
PROFILE_GPU_CPU("Clear");
|
||||
PROFILE_GPU_CPU_NAMED("Clear");
|
||||
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES)
|
||||
{
|
||||
// Full-atlas hardware clear
|
||||
@@ -625,7 +629,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
|
||||
// Send objects data to the GPU
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Objects");
|
||||
PROFILE_GPU_CPU_NAMED("Update Objects");
|
||||
surfaceAtlasData.ObjectsBuffer.Flush(context);
|
||||
}
|
||||
|
||||
@@ -646,7 +650,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer.
|
||||
// Each chunk data contains objects count + all objects addresses.
|
||||
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects.
|
||||
PROFILE_GPU_CPU("Cull Objects");
|
||||
PROFILE_GPU_CPU_NAMED("Cull Objects");
|
||||
uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f);
|
||||
|
||||
// Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity
|
||||
@@ -776,7 +780,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Render direct lighting into atlas
|
||||
if (surfaceAtlasData.Objects.Count() != 0)
|
||||
{
|
||||
PROFILE_GPU_CPU("Direct Lighting");
|
||||
PROFILE_GPU_CPU_NAMED("Direct Lighting");
|
||||
context->SetViewportAndScissors(Viewport(0, 0, (float)resolution, (float)resolution));
|
||||
context->SetRenderTarget(surfaceAtlasData.AtlasLighting->View());
|
||||
context->BindSR(0, surfaceAtlasData.AtlasGBuffer0->View());
|
||||
@@ -876,7 +880,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
|
||||
// Copy emissive light into the final direct lighting atlas
|
||||
{
|
||||
PROFILE_GPU_CPU("Copy Emissive");
|
||||
PROFILE_GPU_CPU_NAMED("Copy Emissive");
|
||||
_vertexBuffer->Clear();
|
||||
for (const auto& e : surfaceAtlasData.Objects)
|
||||
{
|
||||
@@ -921,7 +925,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
continue;
|
||||
|
||||
// Draw draw light
|
||||
PROFILE_GPU_CPU("Directional Light");
|
||||
PROFILE_GPU_CPU_NAMED("Directional Light");
|
||||
const bool useShadow = CanRenderShadow(renderContext.View, light);
|
||||
// TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace
|
||||
light.SetupLightData(&data.Light, useShadow);
|
||||
@@ -955,7 +959,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
continue;
|
||||
|
||||
// Draw draw light
|
||||
PROFILE_GPU_CPU("Point Light");
|
||||
PROFILE_GPU_CPU_NAMED("Point Light");
|
||||
const bool useShadow = CanRenderShadow(renderContext.View, light);
|
||||
light.SetupLightData(&data.Light, useShadow);
|
||||
data.Light.Color *= light.IndirectLightingIntensity;
|
||||
@@ -988,7 +992,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
continue;
|
||||
|
||||
// Draw draw light
|
||||
PROFILE_GPU_CPU("Spot Light");
|
||||
PROFILE_GPU_CPU_NAMED("Spot Light");
|
||||
const bool useShadow = CanRenderShadow(renderContext.View, light);
|
||||
light.SetupLightData(&data.Light, useShadow);
|
||||
data.Light.Color *= light.IndirectLightingIntensity;
|
||||
@@ -1031,7 +1035,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
}
|
||||
if (_vertexBuffer->Data.Count() == 0)
|
||||
break;
|
||||
PROFILE_GPU_CPU("DDGI");
|
||||
PROFILE_GPU_CPU_NAMED("DDGI");
|
||||
data.DDGI = bindingDataDDGI.Constants;
|
||||
data.Light.Radius = giSettings.BounceIntensity / bindingDataDDGI.Constants.IndirectLightingIntensity; // Reuse for smaller CB
|
||||
context->BindSR(5, bindingDataDDGI.ProbesState);
|
||||
|
||||
@@ -472,7 +472,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
GPUTexture* tmpMip = nullptr;
|
||||
if (updated)
|
||||
{
|
||||
PROFILE_GPU_CPU("Init");
|
||||
PROFILE_GPU_CPU_NAMED("Init");
|
||||
for (auto& cascade : sdfData.Cascades)
|
||||
{
|
||||
cascade.NonEmptyChunks.Clear();
|
||||
@@ -589,7 +589,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE;
|
||||
bool anyChunkDispatch = false;
|
||||
{
|
||||
PROFILE_GPU_CPU("Clear Chunks");
|
||||
PROFILE_GPU_CPU_NAMED("Clear Chunks");
|
||||
for (auto it = cascade.NonEmptyChunks.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
auto& key = it->Item;
|
||||
@@ -606,7 +606,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
}
|
||||
}
|
||||
{
|
||||
PROFILE_GPU_CPU("Rasterize Chunks");
|
||||
PROFILE_GPU_CPU_NAMED("Rasterize Chunks");
|
||||
|
||||
// Update static chunks
|
||||
for (auto it = chunks.Begin(); it.IsNotEnd(); ++it)
|
||||
@@ -637,7 +637,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
const auto& objectIndexToDataIndex = ObjectIndexToDataIndexCache;
|
||||
if (chunks.Count() != 0)
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Objects");
|
||||
PROFILE_GPU_CPU_NAMED("Update Objects");
|
||||
auto& objectIndexToDataIndexCache = ObjectIndexToDataIndexCache;
|
||||
objectIndexToDataIndexCache.Clear();
|
||||
|
||||
@@ -827,7 +827,7 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
// Generate mip out of cascade (empty chunks have distance value 1 which is incorrect so mip will be used as a fallback - lower res)
|
||||
if (updated || anyChunkDispatch)
|
||||
{
|
||||
PROFILE_GPU_CPU("Generate Mip");
|
||||
PROFILE_GPU_CPU_NAMED("Generate Mip");
|
||||
context->ResetUA();
|
||||
const int32 mipDispatchGroups = Math::DivideAndRoundUp(resolutionMip, GLOBAL_SDF_MIP_GROUP_SIZE);
|
||||
static_assert((GLOBAL_SDF_MIP_FLOODS % 2) == 1, "Invalid Global SDF mip flood iterations count.");
|
||||
|
||||
@@ -518,7 +518,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
|
||||
// Render local fog particles
|
||||
if (renderContext.List->VolumetricFogParticles.HasItems())
|
||||
{
|
||||
PROFILE_GPU_CPU("Local Fog");
|
||||
PROFILE_GPU_CPU_NAMED("Local Fog");
|
||||
|
||||
// Bind the output
|
||||
GPUTextureView* rt[] = { vBufferA->ViewVolume(), vBufferB->ViewVolume() };
|
||||
@@ -616,7 +616,7 @@ void VolumetricFogPass::Render(RenderContext& renderContext)
|
||||
// Skip if no lights to render
|
||||
if (pointLights.Count() + spotLights.Count())
|
||||
{
|
||||
PROFILE_GPU_CPU("Lights Injection");
|
||||
PROFILE_GPU_CPU_NAMED("Lights Injection");
|
||||
|
||||
// Allocate temporary buffer for light scattering injection
|
||||
localShadowedLightScattering = GetLocalShadowedLightScattering(renderContext, context, options);
|
||||
|
||||
Reference in New Issue
Block a user