diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp index 2844b6482..d74b03672 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.cpp @@ -219,10 +219,6 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources() _csTraceRays[3] = shader->GetCS("CS_TraceRays", 3); _csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0); _csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1); - _csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0); - _csUpdateBordersIrradianceCollumn = shader->GetCS("CS_UpdateBorders", 1); - _csUpdateBordersDistanceRow = shader->GetCS("CS_UpdateBorders", 2); - _csUpdateBordersDistanceCollumn = shader->GetCS("CS_UpdateBorders", 3); auto device = GPUDevice::Instance; auto psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle; if (!_psIndirectLighting) @@ -250,10 +246,6 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj) _csTraceRays[3] = nullptr; _csUpdateProbesIrradiance = nullptr; _csUpdateProbesDistance = nullptr; - _csUpdateBordersIrradianceRow = nullptr; - _csUpdateBordersIrradianceCollumn = nullptr; - _csUpdateBordersDistanceRow = nullptr; - _csUpdateBordersDistanceCollumn = nullptr; SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting); invalidateResources(); } @@ -542,7 +534,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont // Update probes { PROFILE_GPU_CPU_NAMED("Probes Update"); - bool anyDirty = false; uint32 threadGroupsX, threadGroupsY; #if DDGI_DEBUG_STATS uint32 zero[4] = {}; @@ -552,7 +543,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont { if (cascadeSkipUpdate[cascadeIndex]) continue; - anyDirty = true; // Classify probes (activation/deactivation and relocation) { @@ -667,33 +657,6 @@ bool DynamicDiffuseGlobalIlluminationPass::RenderInner(RenderContext& renderCont } } #endif - - // Update probes border pixels - if (anyDirty) - { - PROFILE_GPU_CPU_NAMED("Update Borders"); - - // Irradiance - context->BindUA(0, ddgiData.Result.ProbesIrradiance); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersIrradianceCollumn, threadGroupsX, threadGroupsY, 1); - - // Distance - context->BindUA(0, ddgiData.Result.ProbesDistance); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceRow, threadGroupsX, threadGroupsY, 1); - threadGroupsX = Math::DivideAndRoundUp(probesCountTotalX, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - threadGroupsY = Math::DivideAndRoundUp(probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE); - context->Dispatch(_csUpdateBordersDistanceCollumn, threadGroupsX, threadGroupsY, 1); - - context->ResetUA(); - context->ResetSR(); - } } return false; diff --git a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h index 28b18f39c..c56604255 100644 --- a/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h +++ b/Source/Engine/Renderer/GI/DynamicDiffuseGlobalIllumination.h @@ -47,10 +47,6 @@ private: GPUShaderProgramCS* _csTraceRays[4]; GPUShaderProgramCS* _csUpdateProbesIrradiance; GPUShaderProgramCS* _csUpdateProbesDistance; - GPUShaderProgramCS* _csUpdateBordersIrradianceRow; - GPUShaderProgramCS* _csUpdateBordersIrradianceCollumn; - GPUShaderProgramCS* _csUpdateBordersDistanceRow; - GPUShaderProgramCS* _csUpdateBordersDistanceCollumn; GPUPipelineState* _psIndirectLighting; #if USE_EDITOR AssetReference _debugModel; diff --git a/Source/Shaders/GI/DDGI.shader b/Source/Shaders/GI/DDGI.shader index 59e74bbb9..73cac7c26 100644 --- a/Source/Shaders/GI/DDGI.shader +++ b/Source/Shaders/GI/DDGI.shader @@ -415,16 +415,22 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID) #endif -#if defined(_CS_UpdateProbes) || defined(_CS_UpdateBorders) +#if defined(_CS_UpdateProbes) #if DDGI_PROBE_UPDATE_MODE == 0 // Update irradiance #define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT]; groupshared float OutputInstability[DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION]; +#else +// Update distance +#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE +groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; +#endif // Source: https://github.com/turanszkij/WickedEngine #define BorderOffsetsSize (4 * DDGI_PROBE_RESOLUTION + 4) +#if DDGI_PROBE_RESOLUTION == 6 static const uint4 BorderOffsets[BorderOffsetsSize] = { uint4(6, 1, 1, 0), uint4(5, 1, 2, 0), @@ -457,12 +463,77 @@ static const uint4 BorderOffsets[BorderOffsetsSize] = { uint4(1, 1, 7, 7), uint4(6, 1, 0, 7), uint4(1, 6, 7, 0), - uint4(6, 6, 0, 0), + uint4(6, 6, 0, 0) +}; +#elif DDGI_PROBE_RESOLUTION == 14 +static const uint4 BorderOffsets[BorderOffsetsSize] = { + uint4(14, 1, 1, 0), + uint4(13, 1, 2, 0), + uint4(12, 1, 3, 0), + uint4(11, 1, 4, 0), + uint4(10, 1, 5, 0), + uint4(9, 1, 6, 0), + uint4(8, 1, 7, 0), + uint4(7, 1, 8, 0), + uint4(6, 1, 9, 0), + uint4(5, 1, 10, 0), + uint4(4, 1, 11, 0), + uint4(3, 1, 12, 0), + uint4(2, 1, 13, 0), + uint4(1, 1, 14, 0), + + uint4(14, 14, 1, 15), + uint4(13, 14, 2, 15), + uint4(12, 14, 3, 15), + uint4(11, 14, 4, 15), + uint4(10, 14, 5, 15), + uint4(9, 14, 6, 15), + uint4(8, 14, 7, 15), + uint4(7, 14, 8, 15), + uint4(6, 14, 9, 15), + uint4(5, 14, 10, 15), + uint4(4, 14, 11, 15), + uint4(3, 14, 12, 15), + uint4(2, 14, 13, 15), + uint4(1, 14, 14, 15), + + uint4(1, 14, 0, 1), + uint4(1, 13, 0, 2), + uint4(1, 12, 0, 3), + uint4(1, 11, 0, 4), + uint4(1, 10, 0, 5), + uint4(1, 9, 0, 6), + uint4(1, 8, 0, 7), + uint4(1, 7, 0, 8), + uint4(1, 6, 0, 9), + uint4(1, 5, 0, 10), + uint4(1, 4, 0, 11), + uint4(1, 3, 0, 12), + uint4(1, 2, 0, 13), + uint4(1, 1, 0, 14), + + uint4(14, 14, 15, 1), + uint4(14, 13, 15, 2), + uint4(14, 12, 15, 3), + uint4(14, 11, 15, 4), + uint4(14, 10, 15, 5), + uint4(14, 9, 15, 6), + uint4(14, 8, 15, 7), + uint4(14, 7, 15, 8), + uint4(14, 6, 15, 9), + uint4(14, 5, 15, 10), + uint4(14, 4, 15, 11), + uint4(14, 3, 15, 12), + uint4(14, 2, 15, 13), + uint4(14, 1, 15, 14), + + uint4(14, 14, 0, 0), + uint4(1, 14, 15, 0), + uint4(14, 1, 0, 15), + uint4(1, 1, 15, 15) }; #else -// Update distance -#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE -groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT]; +#error "Unsupported probe size for border values copy." #endif groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT]; @@ -635,9 +706,11 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ RWOutput[outputCoords] = result; + GroupMemoryBarrierWithGroupSync(); + uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); + #if DDGI_PROBE_UPDATE_MODE == 0 // The first thread updates the probe attention based on the instability of all texels - GroupMemoryBarrierWithGroupSync(); BRANCH if (GroupIndex == 0 && probeState != DDGI_PROBE_STATE_INACTIVE) { @@ -665,7 +738,6 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ #if DDGI_DEBUG_INSTABILITY // Copy border pixels - uint2 baseCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2); for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) { uint4 borderOffsets = BorderOffsets[borderIndex]; @@ -673,73 +745,13 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_ } #endif #endif -} -// Compute shader for updating probes irradiance or distance texture borders (fills gaps between probes to support bilinear filtering) -META_CS(true, FEATURE_LEVEL_SM5) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=1) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=0, BORDER_ROW=0) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=1) -META_PERMUTATION_2(DDGI_PROBE_UPDATE_MODE=1, BORDER_ROW=0) -[numthreads(DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE, 1)] -void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID) -{ -#define COPY_PIXEL RWOutput[threadCoordinates] = RWOutput[copyCoordinates] -#define COPY_PIXEL_DEBUG RWOutput[threadCoordinates] = float4(5, 0, 0, 1) - - uint probeSideLength = DDGI_PROBE_RESOLUTION + 2; - uint probeSideLengthMinusOne = probeSideLength - 1; - uint2 copyCoordinates = uint2(0, 0); - uint2 threadCoordinates = DispatchThreadId.xy; -#if BORDER_ROW - threadCoordinates.y *= probeSideLength; - uint corner = DispatchThreadId.x % probeSideLength; -#else - threadCoordinates.x *= probeSideLength; - uint corner = threadCoordinates.y % probeSideLength; -#endif - if (corner == 0 || corner == probeSideLengthMinusOne) - { -#if !BORDER_ROW - // Left corner - copyCoordinates.x = threadCoordinates.x + DDGI_PROBE_RESOLUTION; - copyCoordinates.y = threadCoordinates.y - sign((int)corner - 1) * DDGI_PROBE_RESOLUTION; - COPY_PIXEL; - - // Right corner - threadCoordinates.x += probeSideLengthMinusOne; - copyCoordinates.x = threadCoordinates.x - DDGI_PROBE_RESOLUTION; - COPY_PIXEL; -#endif - return; - } - -#if BORDER_ROW - // Top row - uint probeStart = uint(threadCoordinates.x / probeSideLength) * probeSideLength; - uint offset = probeSideLengthMinusOne - (threadCoordinates.x % probeSideLength); - copyCoordinates = uint2(probeStart + offset, threadCoordinates.y + 1); -#else - // Left column - uint probeStart = uint(threadCoordinates.y / probeSideLength) * probeSideLength; - uint offset = probeSideLengthMinusOne - (threadCoordinates.y % probeSideLength); - copyCoordinates = uint2(threadCoordinates.x + 1, probeStart + offset); -#endif - COPY_PIXEL; - -#if BORDER_ROW - // Bottom row - threadCoordinates.y += probeSideLengthMinusOne; - copyCoordinates = uint2(probeStart + offset, threadCoordinates.y - 1); -#else - // Right column - threadCoordinates.x += probeSideLengthMinusOne; - copyCoordinates = uint2(threadCoordinates.x - 1, probeStart + offset); -#endif - COPY_PIXEL; - -#undef COPY_PIXEL -#undef COPY_PIXEL_DEBUG + // Copy border pixels + for (uint borderIndex = GroupIndex; borderIndex < BorderOffsetsSize; borderIndex += DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION) + { + uint4 borderOffsets = BorderOffsets[borderIndex]; + RWOutput[baseCoords + borderOffsets.zw] = RWOutput[baseCoords + borderOffsets.xy]; + } } #endif