Various small fixes and improvements
This commit is contained in:
@@ -95,6 +95,7 @@ void GPUContextDX11::FrameBegin()
|
||||
GPUContext::FrameBegin();
|
||||
|
||||
// Setup
|
||||
_flushOnDispatch = false;
|
||||
_omDirtyFlag = false;
|
||||
_uaDirtyFlag = false;
|
||||
_cbDirtyFlag = false;
|
||||
@@ -497,50 +498,19 @@ void GPUContextDX11::UpdateCB(GPUConstantBuffer* cb, const void* data)
|
||||
|
||||
void GPUContextDX11::Dispatch(GPUShaderProgramCS* shader, uint32 threadGroupCountX, uint32 threadGroupCountY, uint32 threadGroupCountZ)
|
||||
{
|
||||
CurrentCS = (GPUShaderProgramCSDX11*)shader;
|
||||
|
||||
// Flush
|
||||
flushCBs();
|
||||
flushSRVs();
|
||||
flushUAVs();
|
||||
flushOM();
|
||||
|
||||
// Dispatch
|
||||
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
|
||||
if (_currentCompute != compute)
|
||||
{
|
||||
_currentCompute = compute;
|
||||
_context->CSSetShader(compute, nullptr, 0);
|
||||
}
|
||||
onDispatch(shader);
|
||||
_context->Dispatch(threadGroupCountX, threadGroupCountY, threadGroupCountZ);
|
||||
RENDER_STAT_DISPATCH_CALL();
|
||||
|
||||
CurrentCS = nullptr;
|
||||
}
|
||||
|
||||
void GPUContextDX11::DispatchIndirect(GPUShaderProgramCS* shader, GPUBuffer* bufferForArgs, uint32 offsetForArgs)
|
||||
{
|
||||
ASSERT(bufferForArgs && EnumHasAnyFlags(bufferForArgs->GetFlags(), GPUBufferFlags::Argument));
|
||||
CurrentCS = (GPUShaderProgramCSDX11*)shader;
|
||||
|
||||
auto bufferForArgsDX11 = (GPUBufferDX11*)bufferForArgs;
|
||||
|
||||
// Flush
|
||||
flushCBs();
|
||||
flushSRVs();
|
||||
flushUAVs();
|
||||
flushOM();
|
||||
|
||||
// Dispatch
|
||||
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
|
||||
if (_currentCompute != compute)
|
||||
{
|
||||
_currentCompute = compute;
|
||||
_context->CSSetShader(compute, nullptr, 0);
|
||||
}
|
||||
onDispatch(shader);
|
||||
_context->DispatchIndirect(bufferForArgsDX11->GetBuffer(), offsetForArgs);
|
||||
RENDER_STAT_DISPATCH_CALL();
|
||||
|
||||
CurrentCS = nullptr;
|
||||
}
|
||||
|
||||
@@ -921,6 +891,7 @@ void GPUContextDX11::OverlapUA(bool end)
|
||||
NvAPI_D3D11_EndUAVOverlap(_context);
|
||||
else
|
||||
NvAPI_D3D11_BeginUAVOverlap(_context);
|
||||
_flushOnDispatch |= end;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -931,6 +902,7 @@ void GPUContextDX11::OverlapUA(bool end)
|
||||
agsDriverExtensionsDX11_EndUAVOverlap(AgsContext, _context);
|
||||
else
|
||||
agsDriverExtensionsDX11_BeginUAVOverlap(AgsContext, _context);
|
||||
_flushOnDispatch |= end;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -1046,6 +1018,7 @@ void GPUContextDX11::flushIA()
|
||||
|
||||
void GPUContextDX11::onDrawCall()
|
||||
{
|
||||
_flushOnDispatch = false;
|
||||
flushCBs();
|
||||
flushSRVs();
|
||||
flushUAVs();
|
||||
@@ -1053,4 +1026,27 @@ void GPUContextDX11::onDrawCall()
|
||||
flushOM();
|
||||
}
|
||||
|
||||
void GPUContextDX11::onDispatch(GPUShaderProgramCS* shader)
|
||||
{
|
||||
CurrentCS = (GPUShaderProgramCSDX11*)shader;
|
||||
|
||||
flushCBs();
|
||||
flushSRVs();
|
||||
flushUAVs();
|
||||
flushOM();
|
||||
|
||||
if (_flushOnDispatch)
|
||||
{
|
||||
_flushOnDispatch = false;
|
||||
_context->Flush();
|
||||
}
|
||||
|
||||
auto compute = (ID3D11ComputeShader*)shader->GetBufferHandle();
|
||||
if (_currentCompute != compute)
|
||||
{
|
||||
_currentCompute = compute;
|
||||
_context->CSSetShader(compute, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -30,6 +30,7 @@ private:
|
||||
byte _tracyZone[TracyD3D11ZoneSize];
|
||||
#endif
|
||||
int32 _maxUASlots;
|
||||
bool _flushOnDispatch;
|
||||
|
||||
// Output Merger
|
||||
bool _omDirtyFlag;
|
||||
@@ -111,6 +112,7 @@ private:
|
||||
void flushOM();
|
||||
void flushIA();
|
||||
void onDrawCall();
|
||||
void onDispatch(GPUShaderProgramCS* shader);
|
||||
|
||||
public:
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ void* GPUBufferDX12::Map(GPUResourceMapMode mode)
|
||||
{
|
||||
D3D12_RANGE readRange;
|
||||
D3D12_RANGE* readRangePtr;
|
||||
switch (mode)
|
||||
switch (mode & GPUResourceMapMode::ReadWrite)
|
||||
{
|
||||
case GPUResourceMapMode::Read:
|
||||
readRangePtr = nullptr;
|
||||
|
||||
@@ -26,7 +26,7 @@ public:
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Win32ConditionVariable"/> class.
|
||||
/// </summary>
|
||||
Win32ConditionVariable()
|
||||
__forceinline Win32ConditionVariable()
|
||||
{
|
||||
Windows::InitializeConditionVariable(&_cond);
|
||||
}
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
/// Blocks the current thread execution until the condition variable is woken up.
|
||||
/// </summary>
|
||||
/// <param name="lock">The critical section locked by the current thread.</param>
|
||||
void Wait(const Win32CriticalSection& lock)
|
||||
__forceinline void Wait(const Win32CriticalSection& lock)
|
||||
{
|
||||
Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, 0xFFFFFFFF);
|
||||
}
|
||||
@@ -55,7 +55,7 @@ public:
|
||||
/// <param name="lock">The critical section locked by the current thread.</param>
|
||||
/// <param name="timeout">The time-out interval, in milliseconds. If the time-out interval elapses, the function re-acquires the critical section and returns zero. If timeout is zero, the function tests the states of the specified objects and returns immediately. If timeout is INFINITE, the function's time-out interval never elapses.</param>
|
||||
/// <returns>If the function succeeds, the return value is true, otherwise, if the function fails or the time-out interval elapses, the return value is false.</returns>
|
||||
bool Wait(const Win32CriticalSection& lock, const int32 timeout)
|
||||
__forceinline bool Wait(const Win32CriticalSection& lock, const int32 timeout)
|
||||
{
|
||||
return !!Windows::SleepConditionVariableCS(&_cond, &lock._criticalSection, timeout);
|
||||
}
|
||||
@@ -63,7 +63,7 @@ public:
|
||||
/// <summary>
|
||||
/// Notifies one waiting thread.
|
||||
/// </summary>
|
||||
void NotifyOne()
|
||||
__forceinline void NotifyOne()
|
||||
{
|
||||
Windows::WakeConditionVariable(&_cond);
|
||||
}
|
||||
@@ -71,7 +71,7 @@ public:
|
||||
/// <summary>
|
||||
/// Notifies all waiting threads.
|
||||
/// </summary>
|
||||
void NotifyAll()
|
||||
__forceinline void NotifyAll()
|
||||
{
|
||||
Windows::WakeAllConditionVariable(&_cond);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ public:
|
||||
/// <summary>
|
||||
/// Initializes a new instance of the <see cref="Win32CriticalSection"/> class.
|
||||
/// </summary>
|
||||
Win32CriticalSection()
|
||||
__forceinline Win32CriticalSection()
|
||||
{
|
||||
Windows::InitializeCriticalSectionEx(&_criticalSection, 4000, 0x01000000);
|
||||
}
|
||||
@@ -34,7 +34,7 @@ public:
|
||||
/// <summary>
|
||||
/// Finalizes an instance of the <see cref="Win32CriticalSection"/> class.
|
||||
/// </summary>
|
||||
~Win32CriticalSection()
|
||||
__forceinline ~Win32CriticalSection()
|
||||
{
|
||||
Windows::DeleteCriticalSection(&_criticalSection);
|
||||
}
|
||||
@@ -43,7 +43,7 @@ public:
|
||||
/// <summary>
|
||||
/// Locks the critical section.
|
||||
/// </summary>
|
||||
void Lock() const
|
||||
__forceinline void Lock() const
|
||||
{
|
||||
Windows::EnterCriticalSection(&_criticalSection);
|
||||
}
|
||||
@@ -52,7 +52,7 @@ public:
|
||||
/// Attempts to enter a critical section without blocking. If the call is successful, the calling thread takes ownership of the critical section.
|
||||
/// </summary>
|
||||
/// <returns>True if calling thread took ownership of the critical section.</returns>
|
||||
bool TryLock() const
|
||||
__forceinline bool TryLock() const
|
||||
{
|
||||
return Windows::TryEnterCriticalSection(&_criticalSection) != 0;
|
||||
}
|
||||
@@ -60,7 +60,7 @@ public:
|
||||
/// <summary>
|
||||
/// Releases the lock on the critical section.
|
||||
/// </summary>
|
||||
void Unlock() const
|
||||
__forceinline void Unlock() const
|
||||
{
|
||||
Windows::LeaveCriticalSection(&_criticalSection);
|
||||
}
|
||||
|
||||
@@ -924,12 +924,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
|
||||
}
|
||||
}
|
||||
|
||||
#if PLATFORM_WINDOWS
|
||||
// Hack to fix D3D11 bug that doesn't insert UAV barrier after overlap region ends (between two GPUComputePass)
|
||||
if (context->GetDevice()->GetRendererType() == RendererType::DirectX11)
|
||||
context->Dispatch(_csRasterizeModel0, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
|
||||
#endif
|
||||
|
||||
// Rasterize non-empty chunks (additive layers so need combine with existing chunk data)
|
||||
for (uint32 layer = 0; layer <= maxLayer; layer++)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user