diff --git a/Source/Engine/Profiler/ProfilerGPU.cpp b/Source/Engine/Profiler/ProfilerGPU.cpp index 5df47680c..615dd69ec 100644 --- a/Source/Engine/Profiler/ProfilerGPU.cpp +++ b/Source/Engine/Profiler/ProfilerGPU.cpp @@ -5,6 +5,9 @@ #include "ProfilerGPU.h" #include "ProfilerMemory.h" #include "Engine/Core/Log.h" +#include "Engine/Core/Utilities.h" +#include "Engine/Core/Types/StringBuilder.h" +#include "Engine/Core/Memory/ArenaAllocation.h" #include "Engine/Engine/Engine.h" #include "Engine/Graphics/GPUDevice.h" #include "Engine/Graphics/GPUContext.h" @@ -199,8 +202,266 @@ bool ProfilerGPU::GetLastFrameData(float& drawTimeMs, float& presentTimeMs, Rend return false; } +struct GraphicsDumping +{ + struct Item + { + StringView Name; + StringView FullName; + uint16 Count; + uint16 Depth; + float Time; + RenderStatsData Stats; + }; + + int32 FramesLeft; + bool WasProfilerGPUEnabled; + uint64 NextFrame; + ArenaAllocator Allocator; + Array FrameData; + Array Items; + Char* NameBuffers[2]; + constexpr static int32 BufferSize = 500; + + GraphicsDumping(int32 frames); + ~GraphicsDumping(); + void OnDraw(); + void Add(Array& frame); + void Print(); + + static void AppendName(Char*& name, Char*& other, StringView text) + { + int32 nameLen = StringUtils::Length(name); + ASSERT_LOW_LAYER(nameLen + text.Length() < BufferSize); + Platform::MemoryCopy(other, name, nameLen * sizeof(Char)); + Platform::MemoryCopy(other + nameLen, text.Get(), text.Length() * sizeof(Char)); + other[nameLen + text.Length()] = 0; + Swap(name, other); + } + + static const Char* FormatValue(Char* buffer, int64 value) + { + // Format value with thousands separators (fmt has disabled FMT_USE_LOCALE_GROUPING) + fmt_flax::allocator allocator; + fmt_flax::memory_buffer fmtBuffer(allocator); + fmt_flax::format(fmtBuffer, TEXT("{}"), value); + const StringView str(fmtBuffer.data(), (int32)fmtBuffer.size()); + int32 step = 0; + int32 size = str.Length() + (str.Length() - 1) / 3; + buffer[size--] = 0; + for (int32 i = str.Length() - 1; i >= 0; i--) + { + buffer[size--] = str[i]; + if (++step == 3 && i != 0) + { + step = 0; + buffer[size--] = ','; + } + } + return buffer; + } +}; + +GraphicsDumping* Dumping = nullptr; + +GraphicsDumping::GraphicsDumping(int32 frames) + : Allocator(16 * 1024) // 16kB + , Items(&Allocator) +{ + NameBuffers[0] = (Char*)Allocator.Allocate(BufferSize * sizeof(Char)); + NameBuffers[1] = (Char*)Allocator.Allocate(BufferSize * sizeof(Char)); + FramesLeft = frames; + NextFrame = Engine::FrameCount + 1; // Start from the next frame + WasProfilerGPUEnabled = ProfilerGPU::Enabled; + ProfilerGPU::Enabled = true; + Engine::Draw.Bind(this); +} + +GraphicsDumping::~GraphicsDumping() +{ + Engine::Draw.Unbind(this); + ProfilerGPU::Enabled = WasProfilerGPUEnabled; +} + +void GraphicsDumping::OnDraw() +{ + PROFILE_MEM(Profiler); + + // Process only frames in the profiling range that have data + for (auto& frame : ProfilerGPU::Buffers) + { + if (frame.FrameIndex == NextFrame && frame.HasData()) + { + // Extract events from the frame + FrameData.Clear(); + frame.Extract(FrameData); + + // Put events into the current items hierarchy + Add(FrameData); + + // Move to the next frame + NextFrame++; + FramesLeft--; + if (FramesLeft == 0) + { + // Done! + Print(); + Delete(Dumping); + Dumping = nullptr; + return; + } + } + } +} + +void GraphicsDumping::Add(Array& events) +{ + if (Items.IsEmpty()) + Items.EnsureCapacity(events.Count()); + for (int32 i = 0; i < events.Count(); i++) + { + auto& e = events[i]; + + // Build full name of the event (used to merge events from different frames) + auto nameBuffer = NameBuffers[0]; + auto otherBuffer = NameBuffers[1]; + nameBuffer[0] = otherBuffer[0] = 0; + AppendName(nameBuffer, otherBuffer, e.Name); + for (int32 depth = e.Depth; depth != 0; depth--) + { + // Find parent event + for (int32 j = i - 1; j >= 0; j--) + { + if (events[j].Depth == depth - 1) + { + auto& parent = events[j]; + AppendName(nameBuffer, otherBuffer, StringView(TEXT("/"), 1)); + AppendName(nameBuffer, otherBuffer, parent.Name); + break; + } + } + } + StringView name(nameBuffer); + + // Find that item in the list + int32 itemIndex = 0; + for (; itemIndex < Items.Count(); itemIndex++) + { + if (Items[itemIndex].FullName == name) + break; + } + if (itemIndex == Items.Count()) + { + // Add a new item + auto& newItem = Items.AddOne(); + newItem.Name = e.Name; + newItem.Count = 0; + newItem.Depth = (uint16)e.Depth; + newItem.Time = 0.0f; + newItem.Stats = RenderStatsData(); + auto nameLen = (name.Length() + 1) * sizeof(Char); + auto nameMem = (Char*)Allocator.Allocate(nameLen); + Platform::MemoryCopy(nameMem, name.Get(), nameLen); + newItem.FullName = StringView(nameMem, name.Length()); + } + + // Insert event data into the item + auto& item = Items[itemIndex]; + item.Count++; + item.Time += e.Time; + item.Stats += e.Stats; + } +} + +void GraphicsDumping::Print() +{ + if (Items.IsEmpty()) + { + LOG(Warning, "No drawing found"); + return; + } + + // Average results + for (auto& item : Items) + { + item.Time /= (float)item.Count; + item.Stats *= 1.0f / (float)item.Count; + } + + // Print profiling hierarchy + StringBuilder sb; + sb.AppendLine(TEXT("GPU profiler summary:")); + auto& draw = Items[0]; + { + // The root item is always the drawing by engine + if (draw.Count == 1) + sb.AppendFormat(TEXT(" Frame time: {} ms ({} FPS)"), Utilities::RoundTo2DecimalPlaces(draw.Time), (int32)(1000.0f / draw.Time)).AppendLine(); + else + sb.AppendFormat(TEXT(" Frame time: {} ms ({} FPS), avg of {} frames"), Utilities::RoundTo2DecimalPlaces(draw.Time), (int32)(1000.0f / draw.Time), draw.Count).AppendLine(); + sb.AppendFormat(TEXT(" Draws: {}, Dispatches: {}"), draw.Stats.DrawCalls, draw.Stats.DispatchCalls).AppendLine(); + sb.AppendFormat(TEXT(" Triangles: {}, Vertices: {}, PSO changes: {}"), FormatValue(NameBuffers[0], draw.Stats.Triangles), FormatValue(NameBuffers[1], draw.Stats.Vertices), draw.Stats.PipelineStateChanges).AppendLine(); + } + //sb.AppendLine(TEXT("Hierarchy:")); + for (int32 itemIndex = 1; itemIndex < Items.Count(); itemIndex++) + { + auto& item = Items[itemIndex]; + + // Timing and percentage of the frame + const float percentage = item.Time * 100.0f / draw.Time; + sb.AppendFormat(TEXT("{:>4}% {:>5}ms "), Utilities::RoundTo1DecimalPlace(percentage), Utilities::RoundTo2DecimalPlaces(item.Time)); + + // Indent based on depth + for (int32 depth = 1; depth < item.Depth; depth++) + sb.Append(TEXT(" ")); + + // Event name and stats + if (item.Stats.DrawCalls + item.Stats.DispatchCalls != 0) + { + sb.AppendFormat(TEXT("{}: "), item.Name); + if (item.Stats.DispatchCalls == 0) + if (item.Stats.DrawCalls == 1) + sb.Append(TEXT("1 draw")); + else + sb.AppendFormat(TEXT("{} draws"), item.Stats.DrawCalls); + else if (item.Stats.DrawCalls == 0) + if (item.Stats.DispatchCalls == 1) + sb.Append(TEXT("1 dispatch")); + else + sb.AppendFormat(TEXT("{} dispatches"), item.Stats.DispatchCalls); + else + sb.AppendFormat(TEXT("{} draws, {} dispatches"), item.Stats.DrawCalls, item.Stats.DispatchCalls); + if (item.Stats.Triangles == 1) + sb.AppendFormat(TEXT(", 1 tri, {} verts"), FormatValue(NameBuffers[0], item.Stats.Vertices)); + else if (item.Stats.Triangles != 0) + sb.AppendFormat(TEXT(", {} tris, {} verts"), FormatValue(NameBuffers[0], item.Stats.Triangles), FormatValue(NameBuffers[1], item.Stats.Vertices)); + } + else + { + sb.Append(item.Name); + } + sb.AppendLine(); + } + LOG_STR(Info, sb.ToStringView()); +} + +void ProfilerGPU::Dump(int32 frames) +{ + if (Dumping) + { + LOG(Warning, "Cannot start new profiling while one is active"); + return; + } + if (frames <= 0) + frames = 4; // Default frames count + frames = Math::Min(frames, 100); + PROFILE_MEM(Profiler); + + Dumping = New(frames); +} + void ProfilerGPU::Dispose() { + SAFE_DELETE(Dumping); } #endif diff --git a/Source/Engine/Profiler/ProfilerGPU.h b/Source/Engine/Profiler/ProfilerGPU.h index 811e58d2d..066e93859 100644 --- a/Source/Engine/Profiler/ProfilerGPU.h +++ b/Source/Engine/Profiler/ProfilerGPU.h @@ -170,6 +170,12 @@ public: /// True if got the data, otherwise false. API_FUNCTION() static bool GetLastFrameData(float& drawTimeMs, float& presentTimeMs, RenderStatsData& statsData); + /// + /// Profiles next frame(s) rendering performance and dumps the results to the log (as a hierarchy structure). When using more than 1 frame, the results are averaged for more accurate profiling (especially for A/B testing). + /// + /// Amount of frames to profile for more stable results (event durations are averaged). Value 0 uses default of 4 frames. + API_FUNCTION(Attributes = "DebugCommand") static void Dump(int32 frames = 4); + private: static void BeginFrame(); static void OnPresent(); diff --git a/Source/Engine/Profiler/RenderStats.h b/Source/Engine/Profiler/RenderStats.h index f73eb6400..7afbeb1d4 100644 --- a/Source/Engine/Profiler/RenderStats.h +++ b/Source/Engine/Profiler/RenderStats.h @@ -69,6 +69,30 @@ API_STRUCT() struct RenderStatsData MIX(PipelineStateChanges); #undef MIX } + + RenderStatsData& operator+=(const RenderStatsData& other) + { +#define MIX(name) name += other.name + MIX(DrawCalls); + MIX(DispatchCalls); + MIX(Vertices); + MIX(Triangles); + MIX(PipelineStateChanges); +#undef MIX + return *this; + } + + RenderStatsData& operator*=(float scale) + { +#define MIX(name) name = (int64)(name * scale) + MIX(DrawCalls); + MIX(DispatchCalls); + MIX(Vertices); + MIX(Triangles); + MIX(PipelineStateChanges); +#undef MIX + return *this; + } }; #define RENDER_STAT_DISPATCH_CALL() Platform::InterlockedIncrement(&RenderStatsData::Counter.DispatchCalls)