468 lines
13 KiB
C++
468 lines
13 KiB
C++
// Copyright (c) Wojciech Figat. All rights reserved.
|
|
|
|
#if COMPILE_WITH_PROFILER
|
|
|
|
#include "ProfilerGPU.h"
|
|
#include "ProfilerMemory.h"
|
|
#include "Engine/Core/Log.h"
|
|
#include "Engine/Core/Utilities.h"
|
|
#include "Engine/Core/Types/StringBuilder.h"
|
|
#include "Engine/Core/Memory/ArenaAllocation.h"
|
|
#include "Engine/Engine/Engine.h"
|
|
#include "Engine/Graphics/GPUDevice.h"
|
|
#include "Engine/Graphics/GPUContext.h"
|
|
|
|
RenderStatsData RenderStatsData::Counter;
|
|
|
|
int32 ProfilerGPU::_depth = 0;
|
|
bool ProfilerGPU::Enabled = false;
|
|
bool ProfilerGPU::EventsEnabled = false;
|
|
int32 ProfilerGPU::CurrentBuffer = 0;
|
|
ProfilerGPU::EventBuffer ProfilerGPU::Buffers[PROFILER_GPU_EVENTS_FRAMES];
|
|
|
|
bool ProfilerGPU::EventBuffer::HasData() const
|
|
{
|
|
return _isResolved && _data.HasItems();
|
|
}
|
|
|
|
void ProfilerGPU::EventBuffer::EndAllQueries()
|
|
{
|
|
auto context = GPUDevice::Instance->GetMainContext();
|
|
auto queries = _data.Get();
|
|
for (int32 i = 0; i < _data.Count(); i++)
|
|
{
|
|
auto& e = queries[i];
|
|
if (e.QueryActive)
|
|
{
|
|
e.QueryActive = false;
|
|
context->EndQuery(e.Query);
|
|
}
|
|
}
|
|
}
|
|
|
|
void ProfilerGPU::EventBuffer::TryResolve()
|
|
{
|
|
if (_isResolved || _data.IsEmpty())
|
|
return;
|
|
|
|
// Collect queries results
|
|
PROFILE_MEM(Profiler);
|
|
auto device = GPUDevice::Instance;
|
|
auto queries = _data.Get();
|
|
for (int32 i = 0; i < _data.Count(); i++)
|
|
{
|
|
auto& e = queries[i];
|
|
ASSERT_LOW_LAYER(!e.QueryActive);
|
|
uint64 time;
|
|
if (device->GetQueryResult(e.Query, time, false))
|
|
{
|
|
e.Time = (float)time * 0.001f; // Convert to milliseconds
|
|
}
|
|
else
|
|
return; // Skip if one of the queries is not yet ready (frame still in-flight)
|
|
}
|
|
|
|
_isResolved = true;
|
|
}
|
|
|
|
int32 ProfilerGPU::EventBuffer::Add(const Event& e)
|
|
{
|
|
PROFILE_MEM(Profiler);
|
|
const int32 index = _data.Count();
|
|
_data.Add(e);
|
|
return index;
|
|
}
|
|
|
|
void ProfilerGPU::EventBuffer::Extract(Array<Event>& data) const
|
|
{
|
|
// Don't use unresolved data
|
|
ASSERT(_isResolved);
|
|
data = _data;
|
|
}
|
|
|
|
void ProfilerGPU::EventBuffer::Clear()
|
|
{
|
|
_data.Clear();
|
|
_isResolved = false;
|
|
FrameIndex = 0;
|
|
PresentTime = 0.0f;
|
|
}
|
|
|
|
int32 ProfilerGPU::BeginEvent(const Char* name)
|
|
{
|
|
auto context = GPUDevice::Instance->GetMainContext();
|
|
#if GPU_ALLOW_PROFILE_EVENTS
|
|
if (EventsEnabled)
|
|
context->EventBegin(name);
|
|
#endif
|
|
if (!Enabled)
|
|
return -1;
|
|
|
|
Event e;
|
|
e.Name = name;
|
|
e.Stats = RenderStatsData::Counter;
|
|
e.Query = context->BeginQuery(GPUQueryType::Timer);
|
|
e.Depth = _depth++;
|
|
e.QueryActive = true;
|
|
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
const auto index = buffer.Add(e);
|
|
return index;
|
|
}
|
|
|
|
void ProfilerGPU::EndEvent(int32 index)
|
|
{
|
|
auto context = GPUDevice::Instance->GetMainContext();
|
|
#if GPU_ALLOW_PROFILE_EVENTS
|
|
if (EventsEnabled)
|
|
context->EventEnd();
|
|
#endif
|
|
if (index == -1)
|
|
return;
|
|
_depth--;
|
|
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
auto e = buffer.Get(index);
|
|
e->QueryActive = false;
|
|
e->Stats.Mix(RenderStatsData::Counter);
|
|
context->EndQuery(e->Query);
|
|
}
|
|
|
|
void ProfilerGPU::BeginFrame()
|
|
{
|
|
// Clear stats
|
|
RenderStatsData::Counter = RenderStatsData();
|
|
_depth = 0;
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
buffer.FrameIndex = Engine::FrameCount;
|
|
buffer.PresentTime = 0.0f;
|
|
|
|
// Try to resolve previous frames
|
|
for (int32 i = 0; i < PROFILER_GPU_EVENTS_FRAMES; i++)
|
|
{
|
|
Buffers[i].TryResolve();
|
|
}
|
|
}
|
|
|
|
void ProfilerGPU::OnPresent()
|
|
{
|
|
// End all current frame queries to prevent invalid event duration values
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
buffer.EndAllQueries();
|
|
}
|
|
|
|
void ProfilerGPU::OnPresentTime(float time)
|
|
{
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
buffer.PresentTime += time;
|
|
}
|
|
|
|
void ProfilerGPU::EndFrame()
|
|
{
|
|
if (_depth)
|
|
{
|
|
LOG(Warning, "GPU Profiler events start/end mismatch");
|
|
}
|
|
|
|
// Move frame
|
|
CurrentBuffer = (CurrentBuffer + 1) % PROFILER_GPU_EVENTS_FRAMES;
|
|
|
|
// Prepare current frame buffer
|
|
auto& buffer = Buffers[CurrentBuffer];
|
|
buffer.Clear();
|
|
}
|
|
|
|
bool ProfilerGPU::GetLastFrameData(float& drawTimeMs, float& presentTimeMs, RenderStatsData& statsData)
|
|
{
|
|
uint64 maxFrame = 0;
|
|
int32 maxFrameIndex = -1;
|
|
auto& frames = Buffers;
|
|
for (uint32 i = 0; i < ARRAY_COUNT(frames); i++)
|
|
{
|
|
if (frames[i].HasData() && frames[i].FrameIndex > maxFrame)
|
|
{
|
|
maxFrame = frames[i].FrameIndex;
|
|
maxFrameIndex = i;
|
|
}
|
|
}
|
|
if (maxFrameIndex != -1)
|
|
{
|
|
auto& frame = frames[maxFrameIndex];
|
|
const auto root = frame.Get(0);
|
|
drawTimeMs = root->Time;
|
|
presentTimeMs = frame.PresentTime;
|
|
statsData = root->Stats;
|
|
return true;
|
|
}
|
|
|
|
// No data
|
|
drawTimeMs = 0.0f;
|
|
presentTimeMs = 0.0f;
|
|
Platform::MemoryClear(&statsData, sizeof(statsData));
|
|
return false;
|
|
}
|
|
|
|
struct GraphicsDumping
|
|
{
|
|
struct Item
|
|
{
|
|
StringView Name;
|
|
StringView FullName;
|
|
uint16 Count;
|
|
uint16 Depth;
|
|
float Time;
|
|
RenderStatsData Stats;
|
|
};
|
|
|
|
int32 FramesLeft;
|
|
bool WasProfilerGPUEnabled;
|
|
uint64 NextFrame;
|
|
ArenaAllocator Allocator;
|
|
Array<ProfilerGPU::Event> FrameData;
|
|
Array<Item, ArenaAllocation> Items;
|
|
Char* NameBuffers[2];
|
|
constexpr static int32 BufferSize = 500;
|
|
|
|
GraphicsDumping(int32 frames);
|
|
~GraphicsDumping();
|
|
void OnDraw();
|
|
void Add(Array<ProfilerGPU::Event>& frame);
|
|
void Print();
|
|
|
|
static void AppendName(Char*& name, Char*& other, StringView text)
|
|
{
|
|
int32 nameLen = StringUtils::Length(name);
|
|
ASSERT_LOW_LAYER(nameLen + text.Length() < BufferSize);
|
|
Platform::MemoryCopy(other, name, nameLen * sizeof(Char));
|
|
Platform::MemoryCopy(other + nameLen, text.Get(), text.Length() * sizeof(Char));
|
|
other[nameLen + text.Length()] = 0;
|
|
Swap(name, other);
|
|
}
|
|
|
|
static const Char* FormatValue(Char* buffer, int64 value)
|
|
{
|
|
// Format value with thousands separators (fmt has disabled FMT_USE_LOCALE_GROUPING)
|
|
fmt_flax::allocator allocator;
|
|
fmt_flax::memory_buffer fmtBuffer(allocator);
|
|
fmt_flax::format(fmtBuffer, TEXT("{}"), value);
|
|
const StringView str(fmtBuffer.data(), (int32)fmtBuffer.size());
|
|
int32 step = 0;
|
|
int32 size = str.Length() + (str.Length() - 1) / 3;
|
|
buffer[size--] = 0;
|
|
for (int32 i = str.Length() - 1; i >= 0; i--)
|
|
{
|
|
buffer[size--] = str[i];
|
|
if (++step == 3 && i != 0)
|
|
{
|
|
step = 0;
|
|
buffer[size--] = ',';
|
|
}
|
|
}
|
|
return buffer;
|
|
}
|
|
};
|
|
|
|
GraphicsDumping* Dumping = nullptr;
|
|
|
|
GraphicsDumping::GraphicsDumping(int32 frames)
|
|
: Allocator(16 * 1024) // 16kB
|
|
, Items(&Allocator)
|
|
{
|
|
NameBuffers[0] = (Char*)Allocator.Allocate(BufferSize * sizeof(Char));
|
|
NameBuffers[1] = (Char*)Allocator.Allocate(BufferSize * sizeof(Char));
|
|
FramesLeft = frames;
|
|
NextFrame = Engine::FrameCount + 1; // Start from the next frame
|
|
WasProfilerGPUEnabled = ProfilerGPU::Enabled;
|
|
ProfilerGPU::Enabled = true;
|
|
Engine::Draw.Bind<GraphicsDumping, &GraphicsDumping::OnDraw>(this);
|
|
}
|
|
|
|
GraphicsDumping::~GraphicsDumping()
|
|
{
|
|
Engine::Draw.Unbind<GraphicsDumping, &GraphicsDumping::OnDraw>(this);
|
|
ProfilerGPU::Enabled = WasProfilerGPUEnabled;
|
|
}
|
|
|
|
void GraphicsDumping::OnDraw()
|
|
{
|
|
PROFILE_MEM(Profiler);
|
|
|
|
// Process only frames in the profiling range that have data
|
|
for (auto& frame : ProfilerGPU::Buffers)
|
|
{
|
|
if (frame.FrameIndex == NextFrame && frame.HasData())
|
|
{
|
|
// Extract events from the frame
|
|
FrameData.Clear();
|
|
frame.Extract(FrameData);
|
|
|
|
// Put events into the current items hierarchy
|
|
Add(FrameData);
|
|
|
|
// Move to the next frame
|
|
NextFrame++;
|
|
FramesLeft--;
|
|
if (FramesLeft == 0)
|
|
{
|
|
// Done!
|
|
Print();
|
|
Delete(Dumping);
|
|
Dumping = nullptr;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void GraphicsDumping::Add(Array<ProfilerGPU::Event>& events)
|
|
{
|
|
if (Items.IsEmpty())
|
|
Items.EnsureCapacity(events.Count());
|
|
for (int32 i = 0; i < events.Count(); i++)
|
|
{
|
|
auto& e = events[i];
|
|
|
|
// Build full name of the event (used to merge events from different frames)
|
|
auto nameBuffer = NameBuffers[0];
|
|
auto otherBuffer = NameBuffers[1];
|
|
nameBuffer[0] = otherBuffer[0] = 0;
|
|
AppendName(nameBuffer, otherBuffer, e.Name);
|
|
for (int32 depth = e.Depth; depth != 0; depth--)
|
|
{
|
|
// Find parent event
|
|
for (int32 j = i - 1; j >= 0; j--)
|
|
{
|
|
if (events[j].Depth == depth - 1)
|
|
{
|
|
auto& parent = events[j];
|
|
AppendName(nameBuffer, otherBuffer, StringView(TEXT("/"), 1));
|
|
AppendName(nameBuffer, otherBuffer, parent.Name);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
StringView name(nameBuffer);
|
|
|
|
// Find that item in the list
|
|
int32 itemIndex = 0;
|
|
for (; itemIndex < Items.Count(); itemIndex++)
|
|
{
|
|
if (Items[itemIndex].FullName == name)
|
|
break;
|
|
}
|
|
if (itemIndex == Items.Count())
|
|
{
|
|
// Add a new item
|
|
auto& newItem = Items.AddOne();
|
|
newItem.Name = e.Name;
|
|
newItem.Count = 0;
|
|
newItem.Depth = (uint16)e.Depth;
|
|
newItem.Time = 0.0f;
|
|
newItem.Stats = RenderStatsData();
|
|
auto nameLen = (name.Length() + 1) * sizeof(Char);
|
|
auto nameMem = (Char*)Allocator.Allocate(nameLen);
|
|
Platform::MemoryCopy(nameMem, name.Get(), nameLen);
|
|
newItem.FullName = StringView(nameMem, name.Length());
|
|
}
|
|
|
|
// Insert event data into the item
|
|
auto& item = Items[itemIndex];
|
|
item.Count++;
|
|
item.Time += e.Time;
|
|
item.Stats += e.Stats;
|
|
}
|
|
}
|
|
|
|
void GraphicsDumping::Print()
|
|
{
|
|
if (Items.IsEmpty())
|
|
{
|
|
LOG(Warning, "No drawing found");
|
|
return;
|
|
}
|
|
|
|
// Average results
|
|
for (auto& item : Items)
|
|
{
|
|
item.Time /= (float)item.Count;
|
|
item.Stats *= 1.0f / (float)item.Count;
|
|
}
|
|
|
|
// Print profiling hierarchy
|
|
StringBuilder sb;
|
|
sb.AppendLine(TEXT("GPU profiler summary:"));
|
|
auto& draw = Items[0];
|
|
{
|
|
// The root item is always the drawing by engine
|
|
if (draw.Count == 1)
|
|
sb.AppendFormat(TEXT(" Frame time: {} ms ({} FPS)"), Utilities::RoundTo2DecimalPlaces(draw.Time), (int32)(1000.0f / draw.Time)).AppendLine();
|
|
else
|
|
sb.AppendFormat(TEXT(" Frame time: {} ms ({} FPS), avg of {} frames"), Utilities::RoundTo2DecimalPlaces(draw.Time), (int32)(1000.0f / draw.Time), draw.Count).AppendLine();
|
|
sb.AppendFormat(TEXT(" Draws: {}, Dispatches: {}"), draw.Stats.DrawCalls, draw.Stats.DispatchCalls).AppendLine();
|
|
sb.AppendFormat(TEXT(" Triangles: {}, Vertices: {}, PSO changes: {}"), FormatValue(NameBuffers[0], draw.Stats.Triangles), FormatValue(NameBuffers[1], draw.Stats.Vertices), draw.Stats.PipelineStateChanges).AppendLine();
|
|
}
|
|
//sb.AppendLine(TEXT("Hierarchy:"));
|
|
for (int32 itemIndex = 1; itemIndex < Items.Count(); itemIndex++)
|
|
{
|
|
auto& item = Items[itemIndex];
|
|
|
|
// Timing and percentage of the frame
|
|
const float percentage = item.Time * 100.0f / draw.Time;
|
|
sb.AppendFormat(TEXT("{:>5.2f}% {:>5.2f}ms "), Utilities::RoundTo1DecimalPlace(percentage), Utilities::RoundTo2DecimalPlaces(item.Time));
|
|
|
|
// Indent based on depth
|
|
for (int32 depth = 1; depth < item.Depth; depth++)
|
|
sb.Append(TEXT(" "));
|
|
|
|
// Event name and stats
|
|
if (item.Stats.DrawCalls + item.Stats.DispatchCalls != 0)
|
|
{
|
|
sb.AppendFormat(TEXT("{}: "), item.Name);
|
|
if (item.Stats.DispatchCalls == 0)
|
|
if (item.Stats.DrawCalls == 1)
|
|
sb.Append(TEXT("1 draw"));
|
|
else
|
|
sb.AppendFormat(TEXT("{} draws"), item.Stats.DrawCalls);
|
|
else if (item.Stats.DrawCalls == 0)
|
|
if (item.Stats.DispatchCalls == 1)
|
|
sb.Append(TEXT("1 dispatch"));
|
|
else
|
|
sb.AppendFormat(TEXT("{} dispatches"), item.Stats.DispatchCalls);
|
|
else
|
|
sb.AppendFormat(TEXT("{} draws, {} dispatches"), item.Stats.DrawCalls, item.Stats.DispatchCalls);
|
|
if (item.Stats.Triangles == 1)
|
|
sb.AppendFormat(TEXT(", 1 tri, {} verts"), FormatValue(NameBuffers[0], item.Stats.Vertices));
|
|
else if (item.Stats.Triangles != 0)
|
|
sb.AppendFormat(TEXT(", {} tris, {} verts"), FormatValue(NameBuffers[0], item.Stats.Triangles), FormatValue(NameBuffers[1], item.Stats.Vertices));
|
|
}
|
|
else
|
|
{
|
|
sb.Append(item.Name);
|
|
}
|
|
sb.AppendLine();
|
|
}
|
|
LOG_STR(Info, sb.ToStringView());
|
|
}
|
|
|
|
void ProfilerGPU::Dump(int32 frames)
|
|
{
|
|
if (Dumping)
|
|
{
|
|
LOG(Warning, "Cannot start new profiling while one is active");
|
|
return;
|
|
}
|
|
if (frames <= 0)
|
|
frames = 4; // Default frames count
|
|
frames = Math::Min(frames, 100);
|
|
PROFILE_MEM(Profiler);
|
|
|
|
Dumping = New<GraphicsDumping>(frames);
|
|
}
|
|
|
|
void ProfilerGPU::Dispose()
|
|
{
|
|
SAFE_DELETE(Dumping);
|
|
}
|
|
|
|
#endif
|