Optimize draw calls sorting and objects buffer building to be async
This commit is contained in:
@@ -325,8 +325,8 @@ public:
|
||||
Platform::Free(tmp);
|
||||
}
|
||||
|
||||
template<typename T, typename AllocationType = HeapAllocation>
|
||||
FORCE_INLINE static void MergeSort(Array<T, AllocationType>& data, Array<T, AllocationType>* tmp = nullptr)
|
||||
template<typename T, typename AllocationType = HeapAllocation, typename TempAllocationType = HeapAllocation>
|
||||
FORCE_INLINE static void MergeSort(Array<T, AllocationType>& data, Array<T, TempAllocationType>* tmp = nullptr)
|
||||
{
|
||||
if (tmp)
|
||||
tmp->Resize(data.Count());
|
||||
|
||||
@@ -26,12 +26,7 @@ static_assert(sizeof(ShaderObjectData) == sizeof(Float4) * ARRAY_COUNT(ShaderObj
|
||||
|
||||
namespace
|
||||
{
|
||||
// Cached data for the draw calls sorting
|
||||
Array<uint64> SortingKeys[2];
|
||||
Array<int32> SortingIndices;
|
||||
Array<DrawBatch> SortingBatches;
|
||||
Array<RenderList*> FreeRenderList;
|
||||
|
||||
Array<Pair<void*, uintptr>> MemPool;
|
||||
CriticalSection MemPoolLocker;
|
||||
}
|
||||
@@ -199,12 +194,15 @@ void RendererAllocation::Free(void* ptr, uintptr size)
|
||||
|
||||
RenderList* RenderList::GetFromPool()
|
||||
{
|
||||
MemPoolLocker.Lock();
|
||||
if (FreeRenderList.HasItems())
|
||||
{
|
||||
const auto result = FreeRenderList.Last();
|
||||
FreeRenderList.RemoveLast();
|
||||
MemPoolLocker.Unlock();
|
||||
return result;
|
||||
}
|
||||
MemPoolLocker.Unlock();
|
||||
|
||||
return New<RenderList>();
|
||||
}
|
||||
@@ -213,10 +211,12 @@ void RenderList::ReturnToPool(RenderList* cache)
|
||||
{
|
||||
if (!cache)
|
||||
return;
|
||||
cache->Clear();
|
||||
|
||||
MemPoolLocker.Lock();
|
||||
ASSERT(!FreeRenderList.Contains(cache));
|
||||
FreeRenderList.Add(cache);
|
||||
cache->Clear();
|
||||
MemPoolLocker.Unlock();
|
||||
}
|
||||
|
||||
void RenderList::CleanupCache()
|
||||
@@ -224,13 +224,12 @@ void RenderList::CleanupCache()
|
||||
// Don't call it during rendering (data may be already in use)
|
||||
ASSERT(GPUDevice::Instance == nullptr || GPUDevice::Instance->CurrentTask == nullptr);
|
||||
|
||||
SortingKeys[0].Resize(0);
|
||||
SortingKeys[1].Resize(0);
|
||||
SortingIndices.Resize(0);
|
||||
MemPoolLocker.Lock();
|
||||
FreeRenderList.ClearDelete();
|
||||
for (auto& e : MemPool)
|
||||
Platform::Free(e.First);
|
||||
MemPool.Clear();
|
||||
MemPoolLocker.Unlock();
|
||||
}
|
||||
|
||||
bool RenderList::BlendableSettings::operator<(const BlendableSettings& other) const
|
||||
@@ -648,12 +647,12 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
|
||||
const int32 listSize = list.Indices.Count();
|
||||
ZoneValue(listSize);
|
||||
|
||||
// Peek shared memory
|
||||
#define PREPARE_CACHE(list) (list).Clear(); (list).Resize(listSize)
|
||||
PREPARE_CACHE(SortingKeys[0]);
|
||||
PREPARE_CACHE(SortingKeys[1]);
|
||||
PREPARE_CACHE(SortingIndices);
|
||||
#undef PREPARE_CACHE
|
||||
// Use shared memory from renderer allocator
|
||||
Array<uint64, RendererAllocation> SortingKeys[2];
|
||||
Array<int32, RendererAllocation> SortingIndices;
|
||||
SortingKeys[0].Resize(listSize);
|
||||
SortingKeys[1].Resize(listSize);
|
||||
SortingIndices.Resize(listSize);
|
||||
uint64* sortedKeys = SortingKeys[0].Get();
|
||||
|
||||
// Setup sort keys
|
||||
@@ -726,7 +725,8 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
|
||||
if (stable)
|
||||
{
|
||||
// Sort draw calls batches by depth
|
||||
Sorting::MergeSort(list.Batches, &SortingBatches);
|
||||
Array<DrawBatch, RendererAllocation> sortingBatches;
|
||||
Sorting::MergeSort(list.Batches, &sortingBatches);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -425,29 +425,69 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
|
||||
#endif
|
||||
}
|
||||
|
||||
// Sort draw calls
|
||||
// Process draw calls (sorting, objects buffer building)
|
||||
{
|
||||
PROFILE_CPU_NAMED("Sort Draw Calls");
|
||||
// TODO: run all of these functions in async via jobs
|
||||
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
|
||||
renderContextBatch.Contexts[i].List->BuildObjectsBuffer();
|
||||
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBuffer);
|
||||
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::GBufferNoDecals);
|
||||
renderContext.List->SortDrawCalls(renderContext, true, DrawCallsListType::Forward);
|
||||
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::Distortion);
|
||||
if (setup.UseMotionVectors)
|
||||
renderContext.List->SortDrawCalls(renderContext, false, DrawCallsListType::MotionVectors);
|
||||
for (int32 i = 1; i < renderContextBatch.Contexts.Count(); i++)
|
||||
PROFILE_CPU_NAMED("Process Draw Calls");
|
||||
|
||||
// Utility that handles async jobs for a specific rendering routines in async
|
||||
struct DrawCallsProcessor
|
||||
{
|
||||
auto& shadowContext = renderContextBatch.Contexts.Get()[i];
|
||||
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth);
|
||||
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth);
|
||||
}
|
||||
RenderContextBatch& RenderContextBatch;
|
||||
Pair<DrawCallsListType, bool> MainContextSorting[5] =
|
||||
{
|
||||
// Draw List + Reverse Distance sorting
|
||||
ToPair(DrawCallsListType::GBuffer, false),
|
||||
ToPair(DrawCallsListType::GBufferNoDecals, false),
|
||||
ToPair(DrawCallsListType::Forward, true),
|
||||
ToPair(DrawCallsListType::Distortion, false),
|
||||
ToPair(DrawCallsListType::MotionVectors, false),
|
||||
};
|
||||
|
||||
void BuildObjectsBufferJob(int32 index)
|
||||
{
|
||||
RenderContextBatch.Contexts[index].List->BuildObjectsBuffer();
|
||||
}
|
||||
|
||||
void SortDrawCallsJob(int32 index)
|
||||
{
|
||||
RenderContext& renderContext = RenderContextBatch.GetMainContext();
|
||||
if (index < ARRAY_COUNT(MainContextSorting))
|
||||
{
|
||||
// Main context sorting
|
||||
RenderSetup& setup = renderContext.List->Setup;
|
||||
auto sorting = MainContextSorting[index];
|
||||
if (sorting.First == DrawCallsListType::MotionVectors && !setup.UseMotionVectors)
|
||||
return;
|
||||
renderContext.List->SortDrawCalls(renderContext, sorting.Second, sorting.First);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Shadow context sorting
|
||||
auto& shadowContext = RenderContextBatch.Contexts[index - ARRAY_COUNT(MainContextSorting)];
|
||||
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth, DrawPass::Depth);
|
||||
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, DrawPass::Depth);
|
||||
}
|
||||
}
|
||||
} processor = { renderContextBatch };
|
||||
|
||||
// Dispatch async jobs
|
||||
Function<void(int32)> func;
|
||||
func.Bind<DrawCallsProcessor, &DrawCallsProcessor::BuildObjectsBufferJob>(&processor);
|
||||
const int64 buildObjectsBufferJob = JobSystem::Dispatch(func, renderContextBatch.Contexts.Count());
|
||||
func.Bind<DrawCallsProcessor, &DrawCallsProcessor::SortDrawCallsJob>(&processor);
|
||||
const int64 sortDrawCallsJob = JobSystem::Dispatch(func, ARRAY_COUNT(DrawCallsProcessor::MainContextSorting) + renderContextBatch.Contexts.Count());
|
||||
|
||||
// Upload objects buffers to the GPU
|
||||
JobSystem::Wait(buildObjectsBufferJob);
|
||||
{
|
||||
PROFILE_CPU_NAMED("FlushObjectsBuffer");
|
||||
for (int32 i = 0; i < renderContextBatch.Contexts.Count(); i++)
|
||||
renderContextBatch.Contexts[i].List->ObjectBuffer.Flush(context);
|
||||
for (auto& e : renderContextBatch.Contexts)
|
||||
e.List->ObjectBuffer.Flush(context);
|
||||
}
|
||||
|
||||
// Wait for async jobs to finish
|
||||
// TODO: use per-pass wait labels (eg. don't wait for shadow pass draws sorting until ShadowPass needs it)
|
||||
JobSystem::Wait(sortDrawCallsJob);
|
||||
}
|
||||
|
||||
// Get the light accumulation buffer
|
||||
|
||||
Reference in New Issue
Block a user