Optimize GPUVertexLayout::Get to not use mutex on lookup read

#3917
This commit is contained in:
Wojtek Figat
2026-02-06 21:57:16 +01:00
parent 4afd9fd8df
commit 27dd1bda25
2 changed files with 31 additions and 21 deletions

View File

@@ -44,23 +44,30 @@ namespace
Dictionary<uint32, GPUVertexLayout*> LayoutCache;
Dictionary<VertexBufferLayouts, GPUVertexLayout*> VertexBufferCache;
// TODO: it's not safe to use map and then use again with a lock (find a better way, eg. using two maps, one first read-only and thread safe, second with mutex-guarded new values from this frame)
GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count)
{
GPUVertexLayout::Elements elements;
bool anyValid = false;
for (int32 slot = 0; slot < count; slot++)
GPUVertexLayout* result;
CacheLocker.Lock();
if (!VertexBufferCache.TryGet(key, result))
{
if (key.Layouts[slot])
GPUVertexLayout::Elements elements;
bool anyValid = false;
for (int32 slot = 0; slot < count; slot++)
{
anyValid = true;
int32 start = elements.Count();
elements.Add(key.Layouts[slot]->GetElements());
for (int32 j = start; j < elements.Count(); j++)
elements.Get()[j].Slot = (byte)slot;
if (key.Layouts[slot])
{
anyValid = true;
int32 start = elements.Count();
elements.Add(key.Layouts[slot]->GetElements());
for (int32 j = start; j < elements.Count(); j++)
elements.Get()[j].Slot = (byte)slot;
}
}
result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr;
VertexBufferCache.Add(key, result);
}
GPUVertexLayout* result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr;
VertexBufferCache.Add(key, result);
CacheLocker.Unlock();
return result;
}
}
@@ -185,11 +192,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUBuffer*>& vertexBuffers)
key.Layouts[i] = nullptr;
// Lookup existing cache
CacheLocker.Lock();
GPUVertexLayout* result;
if (!VertexBufferCache.TryGet(key, result))
result = AddCache(key, vertexBuffers.Length());
CacheLocker.Unlock();
return result;
}
@@ -209,11 +214,9 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
key.Layouts[i] = nullptr;
// Lookup existing cache
CacheLocker.Lock();
GPUVertexLayout* result;
if (!VertexBufferCache.TryGet(key, result))
result = AddCache(key, layouts.Length());
CacheLocker.Unlock();
return result;
}

View File

@@ -825,6 +825,13 @@ FORCE_INLINE bool DrawsEqual(const DrawCall* a, const DrawCall* b)
Platform::MemoryCompare(a->Geometry.VertexBuffers, b->Geometry.VertexBuffers, sizeof(a->Geometry.VertexBuffers) + sizeof(a->Geometry.VertexBuffersOffsets)) == 0;
}
FORCE_INLINE Span<GPUBuffer*> GetVB(GPUBuffer* const* ptr, int32 maxSize)
{
while (ptr[maxSize - 1] == nullptr && maxSize > 1)
maxSize--;
return ToSpan<GPUBuffer*>(ptr, maxSize);
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, RenderList* drawCallsList, GPUTextureView* input)
{
if (list.IsEmpty())
@@ -953,7 +960,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
Platform::MemoryCopy(vb, activeDraw->Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, activeDraw->Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(activeDraw->Geometry.IndexBuffer);
context->BindVB(ToSpan(vb, ARRAY_COUNT(vb)), vbOffsets);
context->BindVB(GetVB(vb, ARRAY_COUNT(vb)), vbOffsets);
context->DrawIndexedInstanced(activeDraw->Draw.IndicesCount, activeCount, instanceBufferOffset, 0, activeDraw->Draw.StartIndex);
instanceBufferOffset += activeCount;
@@ -970,7 +977,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
// Single-draw call batch
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
context->DrawIndexedInstancedIndirect(drawCall.Draw.IndirectArgsBuffer, drawCall.Draw.IndirectArgsOffset);
@@ -993,7 +1000,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
Platform::MemoryCopy(vb, drawCall.Geometry.VertexBuffers, sizeof(DrawCall::Geometry.VertexBuffers));
Platform::MemoryCopy(vbOffsets, drawCall.Geometry.VertexBuffersOffsets, sizeof(DrawCall::Geometry.VertexBuffersOffsets));
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(vb, vbMax + 1), vbOffsets);
context->BindVB(GetVB(vb, vbMax + 1), vbOffsets);
if (drawCall.InstanceCount == 0)
{
@@ -1023,7 +1030,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
const DrawCall& drawCall = drawCallsData[perDraw.DrawObjectIndex];
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{
@@ -1044,7 +1051,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
bindParams.DrawCall->Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
for (int32 j = 0; j < batch.Instances.Count(); j++)
{
@@ -1068,7 +1075,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
drawCall.Material->Bind(bindParams);
context->BindIB(drawCall.Geometry.IndexBuffer);
context->BindVB(ToSpan(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
context->BindVB(GetVB(drawCall.Geometry.VertexBuffers, vbMax), drawCall.Geometry.VertexBuffersOffsets);
if (drawCall.InstanceCount == 0)
{