From 27dd1bda253cd8b849802ed44165ea3e3575b74d Mon Sep 17 00:00:00 2001 From: Wojtek Figat Date: Fri, 6 Feb 2026 21:57:16 +0100 Subject: [PATCH] Optimize `GPUVertexLayout::Get` to not use mutex on lookup read #3917 --- .../Graphics/Shaders/GPUVertexLayout.cpp | 33 ++++++++++--------- Source/Engine/Renderer/RenderList.cpp | 19 +++++++---- 2 files changed, 31 insertions(+), 21 deletions(-) diff --git a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp index 05c6d605a..6c34b2008 100644 --- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp +++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp @@ -44,23 +44,30 @@ namespace Dictionary LayoutCache; Dictionary VertexBufferCache; + // TODO: it's not safe to use map and then use again with a lock (find a better way, eg. using two maps, one first read-only and thread safe, second with mutex-guarded new values from this frame) GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count) { - GPUVertexLayout::Elements elements; - bool anyValid = false; - for (int32 slot = 0; slot < count; slot++) + GPUVertexLayout* result; + CacheLocker.Lock(); + if (!VertexBufferCache.TryGet(key, result)) { - if (key.Layouts[slot]) + GPUVertexLayout::Elements elements; + bool anyValid = false; + for (int32 slot = 0; slot < count; slot++) { - anyValid = true; - int32 start = elements.Count(); - elements.Add(key.Layouts[slot]->GetElements()); - for (int32 j = start; j < elements.Count(); j++) - elements.Get()[j].Slot = (byte)slot; + if (key.Layouts[slot]) + { + anyValid = true; + int32 start = elements.Count(); + elements.Add(key.Layouts[slot]->GetElements()); + for (int32 j = start; j < elements.Count(); j++) + elements.Get()[j].Slot = (byte)slot; + } } + result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; + VertexBufferCache.Add(key, result); } - GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr; - VertexBufferCache.Add(key, result); + CacheLocker.Unlock(); return result; } } @@ -185,11 +192,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& vertexBuffers) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!VertexBufferCache.TryGet(key, result)) result = AddCache(key, vertexBuffers.Length()); - CacheLocker.Unlock(); return result; } @@ -209,11 +214,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span& layouts) key.Layouts[i] = nullptr; // Lookup existing cache - CacheLocker.Lock(); GPUVertexLayout* result; if (!VertexBufferCache.TryGet(key, result)) result = AddCache(key, layouts.Length()); - CacheLocker.Unlock(); return result; } diff --git a/Source/Engine/Renderer/RenderList.cpp b/Source/Engine/Renderer/RenderList.cpp index ba1f7a0f9..23fcd52dc 100644 --- a/Source/Engine/Renderer/RenderList.cpp +++ b/Source/Engine/Renderer/RenderList.cpp @@ -825,6 +825,13 @@ FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b) Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0; } +FORCE_INLINE Span GetVB(GPUBuffer* const* ptr, int32 maxSize) +{ + while (ptr[maxSize - 1] == nullptr && maxSize > 1) + maxSize--; + return ToSpan(ptr, maxSize); +} + void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input) { if (list.IsEmpty()) @@ -953,7 +960,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(activeDraw->Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets); + context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets); context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex); instanceBufferOffset += activeCount; @@ -970,7 +977,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL // Single-draw call batch context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset); @@ -993,7 +1000,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers)); Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets)); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets); + context->BindVB(GetVB(vb, vbMax + 1), vbOffsets); if (drawCall.InstanceCount == 0) { @@ -1023,7 +1030,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex]; context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) { @@ -1044,7 +1051,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL bindParams.DrawCall->Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); for (int32 j = 0; j < batch.Instances.Count(); j++) { @@ -1068,7 +1075,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL drawCall.Material->Bind(bindParams); context->BindIB(drawCall.Geometry.IndexBuffer); - context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); + context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets); if (drawCall.InstanceCount == 0) {