Optimize GPUVertexLayout caches with ConcurrentDictionary

This commit is contained in:
Wojtek Figat
2026-02-07 00:45:14 +01:00
parent a855b17cc0
commit ecddb8aae5

View File

@@ -8,6 +8,7 @@
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/PixelFormatExtensions.h"
#include "Engine/Threading/ConcurrentDictionary.h"
#if GPU_ENABLE_RESOURCE_NAMING
#include "Engine/Scripting/Enums.h"
#endif
@@ -40,15 +41,12 @@ uint32 GetHash(const VertexBufferLayouts& key)
namespace
{
CriticalSection CacheLocker;
Dictionary<uint32, GPUVertexLayout*> LayoutCache;
Dictionary<VertexBufferLayouts, GPUVertexLayout*> VertexBufferCache;
ConcurrentDictionary<uint32, GPUVertexLayout*> LayoutCache;
ConcurrentDictionary<VertexBufferLayouts, GPUVertexLayout*> VertexBufferCache;
// TODO: it's not safe to use map and then use again with a lock (find a better way, eg. using two maps, one first read-only and thread safe, second with mutex-guarded new values from this frame)
GPUVertexLayout* AddCache(const VertexBufferLayouts& key, int32 count)
GPUVertexLayout* GetCache(const VertexBufferLayouts& key, int32 count)
{
GPUVertexLayout* result;
CacheLocker.Lock();
if (!VertexBufferCache.TryGet(key, result))
{
GPUVertexLayout::Elements elements;
@@ -65,9 +63,15 @@ namespace
}
}
result = anyValid ? GPUVertexLayout::Get(elements, true) : nullptr;
VertexBufferCache.Add(key, result);
if (!VertexBufferCache.Add(key, result))
{
// Other thread added the value
Delete(result);
bool found = VertexBufferCache.TryGet(key, result);
ASSERT(found);
}
}
CacheLocker.Unlock();
return result;
}
}
@@ -155,7 +159,6 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff
}
// Lookup existing cache
CacheLocker.Lock();
GPUVertexLayout* result;
if (!LayoutCache.TryGet(hash, result))
{
@@ -167,12 +170,16 @@ GPUVertexLayout* GPUVertexLayout::Get(const Elements& elements, bool explicitOff
LOG(Error, " {}", e.ToString());
#endif
LOG(Error, "Failed to create vertex layout");
CacheLocker.Unlock();
return nullptr;
}
LayoutCache.Add(hash, result);
if (!LayoutCache.Add(hash, result))
{
// Other thread added the value
Delete(result);
bool found = LayoutCache.TryGet(hash, result);
ASSERT(found);
}
}
CacheLocker.Unlock();
return result;
}
@@ -192,11 +199,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUBuffer*>& vertexBuffers)
key.Layouts[i] = nullptr;
// Lookup existing cache
GPUVertexLayout* result;
if (!VertexBufferCache.TryGet(key, result))
result = AddCache(key, vertexBuffers.Length());
return result;
return GetCache(key, vertexBuffers.Length());
}
GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
@@ -214,11 +217,7 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
key.Layouts[i] = nullptr;
// Lookup existing cache
GPUVertexLayout* result;
if (!VertexBufferCache.TryGet(key, result))
result = AddCache(key, layouts.Length());
return result;
return GetCache(key, layouts.Length());
}
GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder)