Various small fixes and improvements

This commit is contained in:
Wojtek Figat
2025-09-04 15:56:33 +02:00
parent 3e363c8275
commit cd22cd059d
7 changed files with 43 additions and 51 deletions

View File

@@ -95,6 +95,7 @@ void GPUContextDX11::FrameBegin()
GPUContext::FrameBegin();
// Setup
_flushOnDispatch = false;
_omDirtyFlag = false;
_uaDirtyFlag = false;
_cbDirtyFlag = false;
@@ -497,50 +498,19 @@ void GPUContextDX11::UpdateCB(GPUConstantBuffer* cb, const void* data)
void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ)
{
CurrentCS = (GPUShaderProgramCSDX11*)shader;
// Flush
flushCBs();
flushSRVs();
flushUAVs();
flushOM();
// Dispatch
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
if (_currentCompute != compute)
{
_currentCompute = compute;
_context->CSSetShader(compute, nullptr, 0);
}
onDispatch(shader);
_context->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ);
RENDER_STAT_DISPATCH_CALL();
CurrentCS = nullptr;
}
void GPUContextDX11::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
{
ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument));
CurrentCS = (GPUShaderProgramCSDX11*)shader;
auto bufferForArgsDX11 = (GPUBufferDX11*)bufferForArgs;
// Flush
flushCBs();
flushSRVs();
flushUAVs();
flushOM();
// Dispatch
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
if (_currentCompute != compute)
{
_currentCompute = compute;
_context->CSSetShader(compute, nullptr, 0);
}
onDispatch(shader);
_context->DispatchIndirect(bufferForArgsDX11->GetBuffer(), offsetForArgs);
RENDER_STAT_DISPATCH_CALL();
CurrentCS = nullptr;
}
@@ -921,6 +891,7 @@ void GPUContextDX11::OverlapUA(bool end)
NvAPI_D3D11_EndUAVOverlap(_context);
else
NvAPI_D3D11_BeginUAVOverlap(_context);
_flushOnDispatch |= end;
return;
}
#endif
@@ -931,6 +902,7 @@ void GPUContextDX11::OverlapUA(bool end)
agsDriverExtensionsDX11_EndUAVOverlap(AgsContext, _context);
else
agsDriverExtensionsDX11_BeginUAVOverlap(AgsContext, _context);
_flushOnDispatch |= end;
return;
}
#endif
@@ -1046,6 +1018,7 @@ void GPUContextDX11::flushIA()
void GPUContextDX11::onDrawCall()
{
_flushOnDispatch = false;
flushCBs();
flushSRVs();
flushUAVs();
@@ -1053,4 +1026,27 @@ void GPUContextDX11::onDrawCall()
flushOM();
}
void GPUContextDX11::onDispatch(GPUShaderProgramCS* shader)
{
CurrentCS = (GPUShaderProgramCSDX11*)shader;
flushCBs();
flushSRVs();
flushUAVs();
flushOM();
if (_flushOnDispatch)
{
_flushOnDispatch = false;
_context->Flush();
}
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
if (_currentCompute != compute)
{
_currentCompute = compute;
_context->CSSetShader(compute, nullptr, 0);
}
}
#endif

View File

@@ -30,6 +30,7 @@ private:
byte _tracyZone[TracyD3D11ZoneSize];
#endif
int32 _maxUASlots;
bool _flushOnDispatch;
// Output Merger
bool _omDirtyFlag;
@@ -111,6 +112,7 @@ private:
void flushOM();
void flushIA();
void onDrawCall();
void onDispatch(GPUShaderProgramCS* shader);
public:

View File

@@ -38,7 +38,7 @@ void* GPUBufferDX12::Map(GPUResourceMapMode mode)
{
D3D12_RANGE readRange;
D3D12_RANGE* readRangePtr;
switch (mode)
switch (mode & GPUResourceMapMode::ReadWrite)
{
case GPUResourceMapMode::Read:
readRangePtr = nullptr;

View File

@@ -26,7 +26,7 @@ public:
/// <summary>
/// Initializes a new instance of the <see cref="Win32ConditionVariable"/> class.
/// </summary>
Win32ConditionVariable()
__forceinline Win32ConditionVariable()
{
Windows::InitializeConditionVariable(&_cond);
}
@@ -44,7 +44,7 @@ public:
/// Blocks the current thread execution until the condition variable is woken up.
/// </summary>
/// <param name="lock">The critical section locked by the current thread.</param>
void Wait(const Win32CriticalSection& lock)
__forceinline void Wait(const Win32CriticalSection& lock)
{
Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, 0xFFFFFFFF);
}
@@ -55,7 +55,7 @@ public:
/// <param name="lock">The critical section locked by the current thread.</param>
/// <param name="timeout">The time-out interval, in milliseconds. If the time-out interval elapses, the function re-acquires the critical section and returns zero. If timeout is zero, the function tests the states of the specified objects and returns immediately. If timeout is INFINITE, the function's time-out interval never elapses.</param>
/// <returns>If the function succeeds, the return value is true, otherwise, if the function fails or the time-out interval elapses, the return value is false.</returns>
bool Wait(const Win32CriticalSection& lock, const int32 timeout)
__forceinline bool Wait(const Win32CriticalSection& lock, const int32 timeout)
{
return !!Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, timeout);
}
@@ -63,7 +63,7 @@ public:
/// <summary>
/// Notifies one waiting thread.
/// </summary>
void NotifyOne()
__forceinline void NotifyOne()
{
Windows::WakeConditionVariable(&_cond);
}
@@ -71,7 +71,7 @@ public:
/// <summary>
/// Notifies all waiting threads.
/// </summary>
void NotifyAll()
__forceinline void NotifyAll()
{
Windows::WakeAllConditionVariable(&_cond);
}

View File

@@ -26,7 +26,7 @@ public:
/// <summary>
/// Initializes a new instance of the <see cref="Win32CriticalSection"/> class.
/// </summary>
Win32CriticalSection()
__forceinline Win32CriticalSection()
{
Windows::InitializeCriticalSectionEx(&_criticalSection, 4000, 0x01000000);
}
@@ -34,7 +34,7 @@ public:
/// <summary>
/// Finalizes an instance of the <see cref="Win32CriticalSection"/> class.
/// </summary>
~Win32CriticalSection()
__forceinline ~Win32CriticalSection()
{
Windows::DeleteCriticalSection(&_criticalSection);
}
@@ -43,7 +43,7 @@ public:
/// <summary>
/// Locks the critical section.
/// </summary>
void Lock() const
__forceinline void Lock() const
{
Windows::EnterCriticalSection(&_criticalSection);
}
@@ -52,7 +52,7 @@ public:
/// Attempts to enter a critical section without blocking. If the call is successful, the calling thread takes ownership of the critical section.
/// </summary>
/// <returns>True if calling thread took ownership of the critical section.</returns>
bool TryLock() const
__forceinline bool TryLock() const
{
return Windows::TryEnterCriticalSection(&_criticalSection) != 0;
}
@@ -60,7 +60,7 @@ public:
/// <summary>
/// Releases the lock on the critical section.
/// </summary>
void Unlock() const
__forceinline void Unlock() const
{
Windows::LeaveCriticalSection(&_criticalSection);
}

View File

@@ -924,12 +924,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
}
#if PLATFORM_WINDOWS
// Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass)
if (context->GetDevice()->GetRendererType() == RendererType::DirectX11)
context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
#endif
// Rasterize non-empty chunks (additive layers so need combine with existing chunk data)
for (uint32 layer = 0; layer <= maxLayer; layer++)
{