diff --git a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax
index 4289244c8..39677f815 100644
--- a/Content/Editor/DebugMaterials/DDGIDebugProbes.flax
+++ b/Content/Editor/DebugMaterials/DDGIDebugProbes.flax
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:740621fb235edae990ffa259a833b12001eb5027bc6036af0aa34ebca4bcec64
+oid sha256:d317dc7b2fc2700b28e4a2581c567b888ea1ebb62c5da84f826d9b29c510ff17
size 40805
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
index 9b648bd51..2a50fbfd8 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.cpp
@@ -95,6 +95,7 @@ void GPUContextDX11::FrameBegin()
GPUContext::FrameBegin();
// Setup
+ _flushOnDispatch = false;
_omDirtyFlag = false;
_uaDirtyFlag = false;
_cbDirtyFlag = false;
@@ -497,50 +498,19 @@ void GPUContextDX11::UpdateCB(GPUConstantBuffer* cb, const void* data)
void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ)
{
- CurrentCS = (GPUShaderProgramCSDX11*)shader;
-
- // Flush
- flushCBs();
- flushSRVs();
- flushUAVs();
- flushOM();
-
- // Dispatch
- auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
- if (_currentCompute != compute)
- {
- _currentCompute = compute;
- _context->CSSetShader(compute, nullptr, 0);
- }
+ onDispatch(shader);
_context->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ);
RENDER_STAT_DISPATCH_CALL();
-
CurrentCS = nullptr;
}
void GPUContextDX11::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
{
ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument));
- CurrentCS = (GPUShaderProgramCSDX11*)shader;
-
auto bufferForArgsDX11 = (GPUBufferDX11*)bufferForArgs;
-
- // Flush
- flushCBs();
- flushSRVs();
- flushUAVs();
- flushOM();
-
- // Dispatch
- auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
- if (_currentCompute != compute)
- {
- _currentCompute = compute;
- _context->CSSetShader(compute, nullptr, 0);
- }
+ onDispatch(shader);
_context->DispatchIndirect(bufferForArgsDX11->GetBuffer(), offsetForArgs);
RENDER_STAT_DISPATCH_CALL();
-
CurrentCS = nullptr;
}
@@ -921,6 +891,7 @@ void GPUContextDX11::OverlapUA(bool end)
NvAPI_D3D11_EndUAVOverlap(_context);
else
NvAPI_D3D11_BeginUAVOverlap(_context);
+ _flushOnDispatch |= end;
return;
}
#endif
@@ -931,6 +902,7 @@ void GPUContextDX11::OverlapUA(bool end)
agsDriverExtensionsDX11_EndUAVOverlap(AgsContext, _context);
else
agsDriverExtensionsDX11_BeginUAVOverlap(AgsContext, _context);
+ _flushOnDispatch |= end;
return;
}
#endif
@@ -1046,6 +1018,7 @@ void GPUContextDX11::flushIA()
void GPUContextDX11::onDrawCall()
{
+ _flushOnDispatch = false;
flushCBs();
flushSRVs();
flushUAVs();
@@ -1053,4 +1026,27 @@ void GPUContextDX11::onDrawCall()
flushOM();
}
+void GPUContextDX11::onDispatch(GPUShaderProgramCS* shader)
+{
+ CurrentCS = (GPUShaderProgramCSDX11*)shader;
+
+ flushCBs();
+ flushSRVs();
+ flushUAVs();
+ flushOM();
+
+ if (_flushOnDispatch)
+ {
+ _flushOnDispatch = false;
+ _context->Flush();
+ }
+
+ auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
+ if (_currentCompute != compute)
+ {
+ _currentCompute = compute;
+ _context->CSSetShader(compute, nullptr, 0);
+ }
+}
+
#endif
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
index ccdac0d70..7dc693019 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
+++ b/Source/Engine/GraphicsDevice/DirectX/DX11/GPUContextDX11.h
@@ -30,6 +30,7 @@ private:
byte _tracyZone[TracyD3D11ZoneSize];
#endif
int32 _maxUASlots;
+ bool _flushOnDispatch;
// Output Merger
bool _omDirtyFlag;
@@ -111,6 +112,7 @@ private:
void flushOM();
void flushIA();
void onDrawCall();
+ void onDispatch(GPUShaderProgramCS* shader);
public:
diff --git a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp
index 673146148..bb1eaacdb 100644
--- a/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp
+++ b/Source/Engine/GraphicsDevice/DirectX/DX12/GPUBufferDX12.cpp
@@ -38,7 +38,7 @@ void* GPUBufferDX12::Map(GPUResourceMapMode mode)
{
D3D12_RANGE readRange;
D3D12_RANGE* readRangePtr;
- switch (mode)
+ switch (mode & GPUResourceMapMode::ReadWrite)
{
case GPUResourceMapMode::Read:
readRangePtr = nullptr;
diff --git a/Source/Engine/Platform/Win32/Win32ConditionVariable.h b/Source/Engine/Platform/Win32/Win32ConditionVariable.h
index fa9c04ce4..5b8c23c05 100644
--- a/Source/Engine/Platform/Win32/Win32ConditionVariable.h
+++ b/Source/Engine/Platform/Win32/Win32ConditionVariable.h
@@ -26,7 +26,7 @@ public:
///
/// Initializes a new instance of the class.
///
- Win32ConditionVariable()
+ __forceinline Win32ConditionVariable()
{
Windows::InitializeConditionVariable(&_cond);
}
@@ -44,7 +44,7 @@ public:
/// Blocks the current thread execution until the condition variable is woken up.
///
/// The critical section locked by the current thread.
- void Wait(const Win32CriticalSection& lock)
+ __forceinline void Wait(const Win32CriticalSection& lock)
{
Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, 0xFFFFFFFF);
}
@@ -55,7 +55,7 @@ public:
/// The critical section locked by the current thread.
/// The time-out interval, in milliseconds. If the time-out interval elapses, the function re-acquires the critical section and returns zero. If timeout is zero, the function tests the states of the specified objects and returns immediately. If timeout is INFINITE, the function's time-out interval never elapses.
/// If the function succeeds, the return value is true, otherwise, if the function fails or the time-out interval elapses, the return value is false.
- bool Wait(const Win32CriticalSection& lock, const int32 timeout)
+ __forceinline bool Wait(const Win32CriticalSection& lock, const int32 timeout)
{
return !!Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, timeout);
}
@@ -63,7 +63,7 @@ public:
///
/// Notifies one waiting thread.
///
- void NotifyOne()
+ __forceinline void NotifyOne()
{
Windows::WakeConditionVariable(&_cond);
}
@@ -71,7 +71,7 @@ public:
///
/// Notifies all waiting threads.
///
- void NotifyAll()
+ __forceinline void NotifyAll()
{
Windows::WakeAllConditionVariable(&_cond);
}
diff --git a/Source/Engine/Platform/Win32/Win32CriticalSection.h b/Source/Engine/Platform/Win32/Win32CriticalSection.h
index e6375c3fc..748840530 100644
--- a/Source/Engine/Platform/Win32/Win32CriticalSection.h
+++ b/Source/Engine/Platform/Win32/Win32CriticalSection.h
@@ -26,7 +26,7 @@ public:
///
/// Initializes a new instance of the class.
///
- Win32CriticalSection()
+ __forceinline Win32CriticalSection()
{
Windows::InitializeCriticalSectionEx(&_criticalSection, 4000, 0x01000000);
}
@@ -34,7 +34,7 @@ public:
///
/// Finalizes an instance of the class.
///
- ~Win32CriticalSection()
+ __forceinline ~Win32CriticalSection()
{
Windows::DeleteCriticalSection(&_criticalSection);
}
@@ -43,7 +43,7 @@ public:
///
/// Locks the critical section.
///
- void Lock() const
+ __forceinline void Lock() const
{
Windows::EnterCriticalSection(&_criticalSection);
}
@@ -52,7 +52,7 @@ public:
/// Attempts to enter a critical section without blocking. If the call is successful, the calling thread takes ownership of the critical section.
///
/// True if calling thread took ownership of the critical section.
- bool TryLock() const
+ __forceinline bool TryLock() const
{
return Windows::TryEnterCriticalSection(&_criticalSection) != 0;
}
@@ -60,7 +60,7 @@ public:
///
/// Releases the lock on the critical section.
///
- void Unlock() const
+ __forceinline void Unlock() const
{
Windows::LeaveCriticalSection(&_criticalSection);
}
diff --git a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp
index 1240f148c..e4ed7d74f 100644
--- a/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp
+++ b/Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp
@@ -924,12 +924,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
}
-#if PLATFORM_WINDOWS
- // Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass)
- if (context->GetDevice()->GetRendererType() == RendererType::DirectX11)
- context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
-#endif
-
// Rasterize non-empty chunks (additive layers so need combine with existing chunk data)
for (uint32 layer = 0; layer <= maxLayer; layer++)
{