Optimize draw calls sorting and objects buffer building to be async

This commit is contained in:
Wojtek Figat
2024-07-02 16:07:09 +02:00
parent 567d5f70ab
commit 6fbf4a6aac
3 changed files with 76 additions and 36 deletions

View File

@@ -325,8 +325,8 @@ public:
Platform::Free(tmp);
}
template<typename T, typename AllocationType = HeapAllocation>
FORCE_INLINE static void MergeSort(Array<T, AllocationType>& data, Array<T, AllocationType>* tmp = nullptr)
template<typename T, typename AllocationType = HeapAllocation, typename TempAllocationType = HeapAllocation>
FORCE_INLINE static void MergeSort(Array<T, AllocationType>& data, Array<T, TempAllocationType>* tmp = nullptr)
{
if (tmp)
tmp->Resize(data.Count());

View File

@@ -26,12 +26,7 @@ static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObj
namespace
{
// Cached data for the draw calls sorting
Array<uint64> SortingKeys[2];
Array<int32> SortingIndices;
Array<DrawBatch> SortingBatches;
Array<RenderList*> FreeRenderList;
Array<Pair<void*, uintptr>> MemPool;
CriticalSection MemPoolLocker;
}
@@ -199,12 +194,15 @@ void RendererAllocation::Free(void* ptr, uintptr size)
RenderList* RenderList::GetFromPool()
{
MemPoolLocker.Lock();
if (FreeRenderList.HasItems())
{
const auto result = FreeRenderList.Last();
FreeRenderList.RemoveLast();
MemPoolLocker.Unlock();
return result;
}
MemPoolLocker.Unlock();
return New<RenderList>();
}
@@ -213,10 +211,12 @@ void RenderList::ReturnToPool(RenderList* cache)
{
if (!cache)
return;
cache->Clear();
MemPoolLocker.Lock();
ASSERT(!FreeRenderList.Contains(cache));
FreeRenderList.Add(cache);
cache->Clear();
MemPoolLocker.Unlock();
}
void RenderList::CleanupCache()
@@ -224,13 +224,12 @@ void RenderList::CleanupCache()
// Don't call it during rendering (data may be already in use)
ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr);
SortingKeys[0].Resize(0);
SortingKeys[1].Resize(0);
SortingIndices.Resize(0);
MemPoolLocker.Lock();
FreeRenderList.ClearDelete();
for (auto& e : MemPool)
Platform::Free(e.First);
MemPool.Clear();
MemPoolLocker.Unlock();
}
bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const
@@ -648,12 +647,12 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
const int32 listSize = list.Indices.Count();
ZoneValue(listSize);
// Peek shared memory
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
PREPARE_CACHE(SortingKeys[0]);
PREPARE_CACHE(SortingKeys[1]);
PREPARE_CACHE(SortingIndices);
#undef PREPARE_CACHE
// Use shared memory from renderer allocator
Array<uint64, RendererAllocation> SortingKeys[2];
Array<int32, RendererAllocation> SortingIndices;
SortingKeys[0].Resize(listSize);
SortingKeys[1].Resize(listSize);
SortingIndices.Resize(listSize);
uint64* sortedKeys = SortingKeys[0].Get();
// Setup sort keys
@@ -726,7 +725,8 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
if (stable)
{
// Sort draw calls batches by depth
Sorting::MergeSort(list.Batches, &SortingBatches);
Array<DrawBatch, RendererAllocation> sortingBatches;
Sorting::MergeSort(list.Batches, &sortingBatches);
}
}

View File

@@ -425,29 +425,69 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
#endif
}
// Sort draw calls
// Process draw calls (sorting, objects buffer building)
{
PROFILE_CPU_NAMED("Sort Draw Calls");
// TODO: run all of these functions in async via jobs
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
renderContextBatch.Contexts[i].List->BuildObjectsBuffer();
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer);
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals);
renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward);
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Distortion);
if (setup.UseMotionVectors)
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::MotionVectors);
for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++)
PROFILE_CPU_NAMED("Process Draw Calls");
// Utility that handles async jobs for a specific rendering routines in async
struct DrawCallsProcessor
{
auto& shadowContext = renderContextBatch.Contexts.Get()[i];
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth);
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth);
}
RenderContextBatch& RenderContextBatch;
Pair<DrawCallsListType, bool> MainContextSorting[5] =
{
// Draw List + Reverse Distance sorting
ToPair(DrawCallsListType::GBuffer, false),
ToPair(DrawCallsListType::GBufferNoDecals, false),
ToPair(DrawCallsListType::Forward, true),
ToPair(DrawCallsListType::Distortion, false),
ToPair(DrawCallsListType::MotionVectors, false),
};
void BuildObjectsBufferJob(int32 index)
{
RenderContextBatch.Contexts[index].List->BuildObjectsBuffer();
}
void SortDrawCallsJob(int32 index)
{
RenderContext& renderContext = RenderContextBatch.GetMainContext();
if (index < ARRAY_COUNT(MainContextSorting))
{
// Main context sorting
RenderSetup& setup = renderContext.List->Setup;
auto sorting = MainContextSorting[index];
if (sorting.First == DrawCallsListType::MotionVectors && !setup.UseMotionVectors)
return;
renderContext.List->SortDrawCalls(renderContext, sorting.Second, sorting.First);
}
else
{
// Shadow context sorting
auto& shadowContext = RenderContextBatch.Contexts[index - ARRAY_COUNT(MainContextSorting)];
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth);
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth);
}
}
} processor = { renderContextBatch };
// Dispatch async jobs
Function<void(int32)> func;
func.Bind<DrawCallsProcessor, &DrawCallsProcessor::BuildObjectsBufferJob>(&processor);
const int64 buildObjectsBufferJob = JobSystem::Dispatch(func, renderContextBatch.Contexts.Count());
func.Bind<DrawCallsProcessor, &DrawCallsProcessor::SortDrawCallsJob>(&processor);
const int64 sortDrawCallsJob = JobSystem::Dispatch(func, ARRAY_COUNT(DrawCallsProcessor::MainContextSorting) + renderContextBatch.Contexts.Count());
// Upload objects buffers to the GPU
JobSystem::Wait(buildObjectsBufferJob);
{
PROFILE_CPU_NAMED("FlushObjectsBuffer");
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context);
for (auto& e : renderContextBatch.Contexts)
e.List->ObjectBuffer.Flush(context);
}
// Wait for async jobs to finish
// TODO: use per-pass wait labels (eg. don't wait for shadow pass draws sorting until ShadowPass needs it)
JobSystem::Wait(sortDrawCallsJob);
}
// Get the light accumulation buffer