Merge remote-tracking branch 'origin/gi'

# Conflicts:
#	Source/Engine/Renderer/GlobalSignDistanceFieldPass.cpp
This commit is contained in:
Wojtek Figat
2022-06-28 18:16:28 +02:00
95 changed files with 2768 additions and 2416 deletions

BIN
Content/Editor/Camera/M_Camera.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Content/Editor/DefaultFontMaterial.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

BIN
Content/Editor/Gizmo/Material.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/Gizmo/MaterialWire.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
Content/Editor/Highlight Material.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/Icons/IconsMaterial.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

View File

@@ -617,14 +617,14 @@ void PS_Depth(PixelInput input)
#if _PS_QuadOverdraw
#include "./Flax/Editor/QuadOverdraw.hlsl"
//#include "./Flax/Editor/QuadOverdraw.hlsl"
// Pixel Shader function for Quad Overdraw Pass (editor-only)
[earlydepthstencil]
META_PS(USE_EDITOR, FEATURE_LEVEL_SM5)
void PS_QuadOverdraw(float4 svPos : SV_Position, uint primId : SV_PrimitiveID)
{
DoQuadOverdraw(svPos, primId);
//DoQuadOverdraw(svPos, primId);
}
#endif

Binary file not shown.

Binary file not shown.

BIN
Content/Editor/Particles/Smoke.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/Particles/Sparks.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/SpriteMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/TexturePreviewMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Editor/Wires Debug Material.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

BIN
Content/Engine/DefaultMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Engine/DefaultTerrainMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Engine/SingleColorMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Engine/SkyboxMaterial.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Shaders/GI/DDGI.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Shaders/GI/GlobalSurfaceAtlas.flax (Stored with Git LFS)

Binary file not shown.

Binary file not shown.

BIN
Content/Shaders/VolumetricFog.flax (Stored with Git LFS)

Binary file not shown.

View File

@@ -231,7 +231,7 @@ namespace FlaxEditor.Modules
if (_isPlayModeRequested)
{
// Check if editor has been compiled and scripting reloaded (there is no pending reload action)
if ((ScriptsBuilder.IsReady || !Editor.Options.Options.General.AutoReloadScriptsOnMainWindowFocus) && !Level.IsAnyActionPending)
if ((ScriptsBuilder.IsReady || !Editor.Options.Options.General.AutoReloadScriptsOnMainWindowFocus) && !Level.IsAnyActionPending && Level.IsAnySceneLoaded)
{
// Clear flag
_isPlayModeRequested = false;

View File

@@ -1409,6 +1409,7 @@ namespace FlaxEditor.Viewport
new ViewModeOptions(ViewMode.QuadOverdraw, "Quad Overdraw"),
new ViewModeOptions(ViewMode.GlobalSDF, "Global SDF"),
new ViewModeOptions(ViewMode.GlobalSurfaceAtlas, "Global Surface Atlas"),
new ViewModeOptions(ViewMode.GlobalIllumination, "Global Illumination"),
};
private void WidgetCamSpeedShowHide(Control cm)

View File

@@ -481,7 +481,7 @@ void Material::InitCompilationOptions(ShaderCompilationOptions& options)
BytesContainer Material::LoadSurface(bool createDefaultIfMissing)
{
BytesContainer result;
if (WaitForLoaded())
if (WaitForLoaded() && !LastLoadFailed())
return result;
ScopeLock lock(Locker);

View File

@@ -15,6 +15,5 @@ void GraphicsSettings::Apply()
Graphics::AllowCSMBlending = AllowCSMBlending;
Graphics::GlobalSDFQuality = GlobalSDFQuality;
Graphics::GIQuality = GIQuality;
Graphics::GlobalSurfaceAtlasResolution = GlobalSurfaceAtlasResolution;
Graphics::PostProcessSettings = PostProcessSettings;
}

View File

@@ -89,6 +89,12 @@ public:
API_FIELD(Attributes="EditorOrder(2100), DefaultValue(Quality.High), EditorDisplay(\"Global Illumination\")")
Quality GIQuality = Quality::High;
/// <summary>
/// The Global Illumination probes spacing distance (in world units). Defines the quality of the GI resolution. Adjust to 200-500 to improve performance and lower frequency GI data.
/// </summary>
API_FIELD(Attributes="EditorOrder(2120), Limit(50, 1000), EditorDisplay(\"Global Illumination\")")
float GIProbesSpacing = 100;
/// <summary>
/// The Global Surface Atlas resolution. Adjust it if atlas `flickers` due to overflow (eg. to 4096).
/// </summary>

View File

@@ -435,6 +435,14 @@ namespace Math
return amount <= 0 ? 0 : amount >= 1 ? 1 : amount * amount * amount * (amount * (amount * 6 - 15) + 10);
}
// Determines whether the specified value is close to zero (0.0)
// @param a The integer value
// @returns True if the specified value is close to zero (0.0). otherwise false
inline int32 IsZero(int32 a)
{
return a == 0;
}
// Determines whether the specified value is close to zero (0.0f)
// @param a The floating value
// @returns True if the specified value is close to zero (0.0f). otherwise false
@@ -443,6 +451,14 @@ namespace Math
return Abs(a) < ZeroTolerance;
}
// Determines whether the specified value is close to one (1.0)
// @param a The integer value
// @returns True if the specified value is close to one (1.0). otherwise false
inline bool IsOne(int32 a)
{
return a == 1;
}
// Determines whether the specified value is close to one (1.0f)
// @param a The floating value
// @returns True if the specified value is close to one (1.0f). otherwise false

View File

@@ -864,6 +864,11 @@ API_ENUM() enum class ViewMode
/// Draw Global Surface Atlas preview.
/// </summary>
GlobalSurfaceAtlas = 25,
/// <summary>
/// Draw Global Illumination debug preview (eg. irradiance probes).
/// </summary>
GlobalIllumination = 26,
};
/// <summary>

View File

@@ -16,7 +16,6 @@ Quality Graphics::ShadowMapsQuality = Quality::Medium;
bool Graphics::AllowCSMBlending = false;
Quality Graphics::GlobalSDFQuality = Quality::High;
Quality Graphics::GIQuality = Quality::High;
int32 Graphics::GlobalSurfaceAtlasResolution = 2048;
PostProcessSettings Graphics::PostProcessSettings;
#if GRAPHICS_API_NULL

View File

@@ -63,11 +63,6 @@ public:
/// </summary>
API_FIELD() static Quality GIQuality;
/// <summary>
/// The Global Surface Atlas resolution. Adjust it if atlas `flickers` due to overflow.
/// </summary>
API_FIELD() static int32 GlobalSurfaceAtlasResolution;
/// <summary>
/// The default Post Process settings. Can be overriden by PostFxVolume on a level locally, per camera or for a whole map.
/// </summary>

View File

@@ -502,7 +502,7 @@ void MaterialParameter::Bind(BindMeta& meta) const
GlobalSignDistanceFieldPass::BindingData bindingData;
if (GlobalSignDistanceFieldPass::Instance()->Get(meta.Buffers, bindingData))
Platform::MemoryClear(&bindingData, sizeof(bindingData));
bindingData.BindCascades(meta.Context, _registerIndex);
meta.Context->BindSR(_registerIndex, bindingData.Texture ? bindingData.Texture->ViewVolume() : nullptr);
*((GlobalSignDistanceFieldPass::ConstantsData*)(meta.Constants.Get() + _offset)) = bindingData.Constants;
break;
}

View File

@@ -333,7 +333,7 @@ API_STRUCT() struct FLAXENGINE_API GlobalIlluminationSettings : ISerializable
/// Defines how quickly GI blends between the the current frame and the history buffer. Lower values update GI faster, but with more jittering and noise. If the camera in your game doesn't move much, we recommend values closer to 1.
/// </summary>
API_FIELD(Attributes="EditorOrder(20), Limit(0, 1), PostProcessSetting((int)GlobalIlluminationSettingsOverride.TemporalResponse)")
float TemporalResponse = 0.8f;
float TemporalResponse = 0.9f;
/// <summary>
/// Draw distance of the Global Illumination effect. Scene outside the range will use fallback irradiance.

View File

@@ -74,7 +74,8 @@ GPUContextDX12::GPUContextDX12(GPUDeviceDX12* device, D3D12_COMMAND_LIST_TYPE ty
, _isCompute(0)
, _rtDirtyFlag(0)
, _psDirtyFlag(0)
, _cbDirtyFlag(0)
, _cbGraphicsDirtyFlag(0)
, _cbComputeDirtyFlag(0)
, _samplersDirtyFlag(0)
, _rtDepth(nullptr)
, _ibHandle(nullptr)
@@ -214,7 +215,8 @@ void GPUContextDX12::Reset()
// Setup initial state
_currentState = nullptr;
_rtDirtyFlag = false;
_cbDirtyFlag = false;
_cbGraphicsDirtyFlag = false;
_cbComputeDirtyFlag = false;
_samplersDirtyFlag = false;
_rtCount = 0;
_rtDepth = nullptr;
@@ -453,19 +455,30 @@ void GPUContextDX12::flushUAVs()
void GPUContextDX12::flushCBs()
{
if (!_cbDirtyFlag)
return;
_cbDirtyFlag = false;
for (uint32 slotIndex = 0; slotIndex < ARRAY_COUNT(_cbHandles); slotIndex++)
if (_cbGraphicsDirtyFlag && !_isCompute)
{
auto cb = _cbHandles[slotIndex];
_cbGraphicsDirtyFlag = false;
for (uint32 i = 0; i < ARRAY_COUNT(_cbHandles); i++)
{
const auto cb = _cbHandles[i];
if (cb)
{
ASSERT(cb->GPUAddress != 0);
if (_isCompute)
_commandList->SetComputeRootConstantBufferView(slotIndex, cb->GPUAddress);
else
_commandList->SetGraphicsRootConstantBufferView(slotIndex, cb->GPUAddress);
_commandList->SetGraphicsRootConstantBufferView(i, cb->GPUAddress);
}
}
}
else if (_cbComputeDirtyFlag && _isCompute)
{
_cbComputeDirtyFlag = false;
for (uint32 i = 0; i < ARRAY_COUNT(_cbHandles); i++)
{
const auto cb = _cbHandles[i];
if (cb)
{
ASSERT(cb->GPUAddress != 0);
_commandList->SetComputeRootConstantBufferView(i, cb->GPUAddress);
}
}
}
}
@@ -867,7 +880,8 @@ void GPUContextDX12::ResetUA()
void GPUContextDX12::ResetCB()
{
_cbDirtyFlag = false;
_cbGraphicsDirtyFlag = false;
_cbComputeDirtyFlag = false;
Platform::MemoryClear(_cbHandles, sizeof(_cbHandles));
}
@@ -877,7 +891,8 @@ void GPUContextDX12::BindCB(int32 slot, GPUConstantBuffer* cb)
auto cbDX12 = static_cast<GPUConstantBufferDX12*>(cb);
if (_cbHandles[slot] != cbDX12)
{
_cbDirtyFlag = true;
_cbGraphicsDirtyFlag = true;
_cbComputeDirtyFlag = true;
_cbHandles[slot] = cbDX12;
}
}
@@ -988,7 +1003,8 @@ void GPUContextDX12::UpdateCB(GPUConstantBuffer* cb, const void* data)
{
if (_cbHandles[i] == cbDX12)
{
_cbDirtyFlag = true;
_cbGraphicsDirtyFlag = true;
_cbComputeDirtyFlag = true;
break;
}
}

View File

@@ -51,7 +51,8 @@ private:
int32 _isCompute : 1;
int32 _rtDirtyFlag : 1;
int32 _psDirtyFlag : 1;
int32 _cbDirtyFlag : 1;
int32 _cbGraphicsDirtyFlag : 1;
int32 _cbComputeDirtyFlag : 1;
int32 _samplersDirtyFlag : 1;
GPUTextureViewDX12* _rtDepth;

View File

@@ -389,8 +389,9 @@ inline void SetDebugObjectName(T* resource, const Char* data, UINT size)
if (data && size > 0)
resource->SetName(data);
#else
char* ansi = (char*)Allocator::Allocate(size);
char* ansi = (char*)Allocator::Allocate(size + 1);
StringUtils::ConvertUTF162ANSI(data, ansi, size);
ansi[size] ='\0';
SetDebugObjectName(resource, ansi, size);
Allocator::Free(ansi);
#endif

View File

@@ -452,6 +452,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_SAMPLER:
{
// Sampler
ASSERT_LOW_LAYER(slot < GPU_MAX_SAMPLER_BINDED);
const VkSampler sampler = _samplerHandles[slot];
ASSERT(sampler);
needsWrite |= dsWriter.WriteSampler(descriptorIndex, sampler, index);
@@ -460,6 +461,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
{
// Shader Resource (Texture)
ASSERT_LOW_LAYER(slot < GPU_MAX_SR_BINDED);
auto handle = (GPUTextureViewVulkan*)handles[slot];
if (!handle)
{
@@ -490,6 +492,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
{
// Shader Resource (Buffer)
ASSERT_LOW_LAYER(slot < GPU_MAX_SR_BINDED);
auto sr = handles[slot];
if (!sr)
{
@@ -505,6 +508,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
{
// Unordered Access (Texture)
ASSERT_LOW_LAYER(slot < GPU_MAX_UA_BINDED);
auto ua = handles[slot];
ASSERT(ua);
VkImageView imageView;
@@ -516,6 +520,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
{
// Unordered Access (Buffer)
ASSERT_LOW_LAYER(slot < GPU_MAX_UA_BINDED);
auto ua = handles[slot];
if (!ua)
{
@@ -531,6 +536,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
{
// Unordered Access (Buffer)
ASSERT_LOW_LAYER(slot < GPU_MAX_UA_BINDED);
auto ua = handles[slot];
if (!ua)
{
@@ -546,6 +552,7 @@ void GPUContextVulkan::UpdateDescriptorSets(const SpirvShaderDescriptorInfo& des
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
{
// Constant Buffer
ASSERT_LOW_LAYER(slot < GPU_MAX_CB_BINDED);
auto cb = handles[slot];
ASSERT(cb);
VkBuffer buffer;

View File

@@ -42,6 +42,8 @@ void DirectionalLight::Draw(RenderContext& renderContext)
data.ShadowsMode = ShadowsMode;
data.CascadeCount = CascadeCount;
data.ContactShadowsLength = ContactShadowsLength;
data.StaticFlags = GetStaticFlags();
data.ID = GetID();
renderContext.List->DirectionalLights.Add(data);
}
}

View File

@@ -134,6 +134,8 @@ void PointLight::Draw(RenderContext& renderContext)
data.ContactShadowsLength = ContactShadowsLength;
data.IndirectLightingIntensity = IndirectLightingIntensity;
data.IESTexture = IESTexture ? IESTexture->GetTexture() : nullptr;
data.StaticFlags = GetStaticFlags();
data.ID = GetID();
renderContext.List->PointLights.Add(data);
}
}

View File

@@ -116,6 +116,8 @@ void SkyLight::Draw(RenderContext& renderContext)
data.IndirectLightingIntensity = IndirectLightingIntensity;
data.Radius = GetScaledRadius();
data.Image = GetSource();
data.StaticFlags = GetStaticFlags();
data.ID = GetID();
renderContext.List->SkyLights.Add(data);
}
}

View File

@@ -187,6 +187,8 @@ void SpotLight::Draw(RenderContext& renderContext)
data.IESTexture = IESTexture ? IESTexture->GetTexture() : nullptr;
Float3::Transform(Float3::Up, GetOrientation(), data.UpVector);
data.OuterConeAngle = outerConeAngle;
data.StaticFlags = GetStaticFlags();
data.ID = GetID();
renderContext.List->SpotLights.Add(data);
}
}

View File

@@ -335,7 +335,7 @@ void ParticleEmitter::InitCompilationOptions(ShaderCompilationOptions& options)
BytesContainer ParticleEmitter::LoadSurface(bool createDefaultIfMissing)
{
BytesContainer result;
if (WaitForLoaded())
if (WaitForLoaded() && !LastLoadFailed())
return result;
ScopeLock lock(Locker);

View File

@@ -9,6 +9,7 @@
#include "Engine/Core/Math/Int3.h"
#include "Engine/Core/Math/Matrix3x3.h"
#include "Engine/Core/Math/Quaternion.h"
#include "Engine/Core/Config/GraphicsSettings.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
#include "Engine/Debug/DebugDraw.h"
@@ -33,8 +34,7 @@
// This must match HLSL
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32
#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
@@ -49,6 +49,7 @@ PACK_STRUCT(struct Data0
Float2 Padding0;
float ResetBlend;
float TemporalTime;
Int4 ProbeScrollClears[4];
});
PACK_STRUCT(struct Data1
@@ -68,16 +69,14 @@ public:
float ProbesSpacing = 0.0f;
Int3 ProbeScrollOffsets;
Int3 ProbeScrollDirections;
bool ProbeScrollClear[3];
Int3 ProbeScrollClears;
void Clear()
{
ProbesOrigin = Float3::Zero;
ProbeScrollOffsets = Int3::Zero;
ProbeScrollDirections = Int3::Zero;
ProbeScrollClear[0] = false;
ProbeScrollClear[1] = false;
ProbeScrollClear[2] = false;
ProbeScrollClears = Int3::Zero;
}
} Cascades[4];
@@ -88,6 +87,8 @@ public:
GPUTexture* ProbesState = nullptr; // Probes state: (RGB: world-space offset, A: state)
GPUTexture* ProbesIrradiance = nullptr; // Probes irradiance (RGB: sRGB color)
GPUTexture* ProbesDistance = nullptr; // Probes distance (R: mean distance, G: mean distance^2)
GPUBuffer* ActiveProbes = nullptr; // List with indices of the active probes (built during probes classification to use indirect dispatches for probes updating), counter at 0
GPUBuffer* UpdateProbesInitArgs = nullptr; // Indirect dispatch buffer for active-only probes updating (trace+blend)
DynamicDiffuseGlobalIlluminationPass::BindingData Result;
FORCE_INLINE void Release()
@@ -96,6 +97,8 @@ public:
RenderTargetPool::Release(ProbesState);
RenderTargetPool::Release(ProbesIrradiance);
RenderTargetPool::Release(ProbesDistance);
SAFE_DELETE_GPU_RESOURCE(ActiveProbes);
SAFE_DELETE_GPU_RESOURCE(UpdateProbesInitArgs);
}
~DDGICustomBuffer()
@@ -174,7 +177,11 @@ bool DynamicDiffuseGlobalIlluminationPass::setupResources()
if (!_cb0 || !_cb1)
return true;
_csClassify = shader->GetCS("CS_Classify");
_csTraceRays = shader->GetCS("CS_TraceRays");
_csUpdateProbesInitArgs = shader->GetCS("CS_UpdateProbesInitArgs");
_csTraceRays[0] = shader->GetCS("CS_TraceRays", 0);
_csTraceRays[1] = shader->GetCS("CS_TraceRays", 1);
_csTraceRays[2] = shader->GetCS("CS_TraceRays", 2);
_csTraceRays[3] = shader->GetCS("CS_TraceRays", 3);
_csUpdateProbesIrradiance = shader->GetCS("CS_UpdateProbes", 0);
_csUpdateProbesDistance = shader->GetCS("CS_UpdateProbes", 1);
_csUpdateBordersIrradianceRow = shader->GetCS("CS_UpdateBorders", 0);
@@ -201,7 +208,11 @@ void DynamicDiffuseGlobalIlluminationPass::OnShaderReloading(Asset* obj)
{
LastFrameShaderReload = Engine::FrameCount;
_csClassify = nullptr;
_csTraceRays = nullptr;
_csUpdateProbesInitArgs = nullptr;
_csTraceRays[0] = nullptr;
_csTraceRays[1] = nullptr;
_csTraceRays[2] = nullptr;
_csTraceRays[3] = nullptr;
_csUpdateProbesIrradiance = nullptr;
_csUpdateProbesDistance = nullptr;
_csUpdateBordersIrradianceRow = nullptr;
@@ -221,7 +232,6 @@ void DynamicDiffuseGlobalIlluminationPass::Dispose()
// Cleanup
_cb0 = nullptr;
_cb1 = nullptr;
_csTraceRays = nullptr;
_shader = nullptr;
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
#if USE_EDITOR
@@ -268,26 +278,32 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// Setup options
auto& settings = renderContext.List->Settings.GlobalIllumination;
// TODO: implement GI Quality to affect cascades update rate, probes spacing and rays count per probe
const float probesSpacing = 100.0f; // GI probes placement spacing nearby camera (for closest cascade; gets automatically reduced for further cascades)
switch (Graphics::GIQuality)
auto* graphicsSettings = GraphicsSettings::Get();
const float probesSpacing = Math::Clamp(graphicsSettings->GIProbesSpacing, 10.0f, 1000.0f); // GI probes placement spacing nearby camera (for closest cascade; gets automatically reduced for further cascades)
int32 probeRaysCount; // Amount of rays to trace randomly around each probe
switch (Graphics::GIQuality) // Ensure to match CS_TraceRays permutations
{
case Quality::Low:
probeRaysCount = 96;
break;
case Quality::Medium:
probeRaysCount = 128;
break;
case Quality::High:
probeRaysCount = 192;
break;
case Quality::Ultra:
default:
probeRaysCount = 256;
break;
default:
return true;
}
bool debugProbes = false; // TODO: add debug option to draw probes locations -> in Graphics window - Editor-only
ASSERT_LOW_LAYER(probeRaysCount <= DDGI_TRACE_RAYS_LIMIT);
bool debugProbes = renderContext.View.Mode == ViewMode::GlobalIllumination;
const float indirectLightingIntensity = settings.Intensity;
const float probeHistoryWeight = Math::Clamp(settings.TemporalResponse, 0.0f, 0.98f);
const float distance = settings.Distance;
const Color fallbackIrradiance = settings.FallbackIrradiance;
const int32 probeRaysCount = Math::Min(Math::AlignUp(256, DDGI_TRACE_RAYS_GROUP_SIZE_X), DDGI_TRACE_RAYS_LIMIT); // TODO: make it based on the GI Quality
// Automatically calculate amount of cascades to cover the GI distance at the current probes spacing
const int32 idealProbesCount = 20; // Ideal amount of probes per-cascade to try to fit in order to cover whole distance
@@ -365,14 +381,19 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
// Allocate probes textures
uint64 memUsage = 0;
auto desc = GPUTextureDescription::New2D(probesCountTotalX, probesCountTotalY, PixelFormat::Unknown);
// TODO rethink probes data placement in memory -> what if we get [50x50x30] resolution? That's 75000 probes! Use sparse storage with active-only probes
#define INIT_TEXTURE(texture, format, width, height) desc.Format = format; desc.Width = width; desc.Height = height; ddgiData.texture = RenderTargetPool::Get(desc); if (!ddgiData.texture) return true; memUsage += ddgiData.texture->GetMemoryUsage()
desc.Flags = GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess;
INIT_TEXTURE(ProbesTrace, PixelFormat::R16G16B16A16_Float, probeRaysCount, Math::Min(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT));
INIT_TEXTURE(ProbesState, PixelFormat::R16G16B16A16_Float, probesCountTotalX, probesCountTotalY); // TODO: optimize to a RGBA32 (pos offset can be normalized to [0-0.5] range of ProbesSpacing and packed with state flag)
INIT_TEXTURE(ProbesState, PixelFormat::R8G8B8A8_SNorm, probesCountTotalX, probesCountTotalY);
INIT_TEXTURE(ProbesIrradiance, PixelFormat::R11G11B10_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_IRRADIANCE + 2));
INIT_TEXTURE(ProbesDistance, PixelFormat::R16G16_Float, probesCountTotalX * (DDGI_PROBE_RESOLUTION_DISTANCE + 2), probesCountTotalY * (DDGI_PROBE_RESOLUTION_DISTANCE + 2));
#undef INIT_TEXTURE
#define INIT_BUFFER(buffer, name) ddgiData.buffer = GPUDevice::Instance->CreateBuffer(TEXT(name)); if (!ddgiData.buffer || ddgiData.buffer->Init(desc2)) return true; memUsage += ddgiData.buffer->GetMemoryUsage();
GPUBufferDescription desc2 = GPUBufferDescription::Raw((probesCountCascade + 1) * sizeof(uint32), GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess);
INIT_BUFFER(ActiveProbes, "DDGI.ActiveProbes");
desc2 = GPUBufferDescription::Buffer(sizeof(GPUDispatchIndirectArgs) * Math::DivideAndRoundUp(probesCountCascade, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT), GPUBufferFlags::Argument | GPUBufferFlags::UnorderedAccess, PixelFormat::R32_UInt, nullptr, sizeof(uint32));
INIT_BUFFER(UpdateProbesInitArgs, "DDGI.UpdateProbesInitArgs");
#undef INIT_BUFFER
LOG(Info, "Dynamic Diffuse Global Illumination memory usage: {0} MB, probes: {1}", memUsage / 1024 / 1024, probesCountTotal);
clear = true;
}
@@ -389,8 +410,8 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
}
// Calculate which cascades should be updated this frame
const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 };
// TODO: prevent updating 2 cascades at once on Low quality
const uint64 cascadeFrequencies[] = { 2, 3, 5, 7 };
//const uint64 cascadeFrequencies[] = { 1, 2, 3, 5 };
//const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 };
bool cascadeSkipUpdate[4];
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
@@ -405,7 +426,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
continue;
auto& cascade = ddgiData.Cascades[cascadeIndex];
// Reset the volume origin and scroll offsets for each axis
// Reset the volume origin and scroll offsets for each axis once it overflows
for (int32 axis = 0; axis < 3; axis++)
{
if (cascade.ProbeScrollOffsets.Raw[axis] != 0 && (cascade.ProbeScrollOffsets.Raw[axis] % ddgiData.ProbeCounts.Raw[axis] == 0))
@@ -423,7 +444,7 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
const float value = translation.Raw[axis] / cascade.ProbesSpacing;
const int32 scroll = value >= 0.0f ? (int32)Math::Floor(value) : (int32)Math::Ceil(value);
cascade.ProbeScrollOffsets.Raw[axis] += scroll;
cascade.ProbeScrollClear[axis] = scroll != 0;
cascade.ProbeScrollClears.Raw[axis] = scroll;
cascade.ProbeScrollDirections.Raw[axis] = translation.Raw[axis] >= 0.0f ? 1 : -1;
}
}
@@ -437,13 +458,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
int32 probeScrollClear = cascade.ProbeScrollClear[0] + cascade.ProbeScrollClear[1] * 2 + cascade.ProbeScrollClear[2] * 4; // Pack clear flags into bits
ddgiData.Result.Constants.ProbesOriginAndSpacing[cascadeIndex] = Float4(cascade.ProbesOrigin, cascade.ProbesSpacing);
ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, probeScrollClear);
ddgiData.Result.Constants.ProbeScrollDirections[cascadeIndex] = Int4(cascade.ProbeScrollDirections, 0);
ddgiData.Result.Constants.ProbesScrollOffsets[cascadeIndex] = Int4(cascade.ProbeScrollOffsets, 0);
}
ddgiData.Result.Constants.RayMaxDistance = 10000.0f; // TODO: adjust to match perf/quality ratio (make it based on Global SDF and Global Surface Atlas distance)
ddgiData.Result.Constants.ViewDir = renderContext.View.Direction;
ddgiData.Result.Constants.ViewPos = renderContext.View.Position;
ddgiData.Result.Constants.RaysCount = probeRaysCount;
ddgiData.Result.Constants.ProbeHistoryWeight = probeHistoryWeight;
ddgiData.Result.Constants.IrradianceGamma = 5.0f;
@@ -465,6 +484,11 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
data.GlobalSDF = bindingDataSDF.Constants;
data.GlobalSurfaceAtlas = bindingDataSurfaceAtlas.Constants;
data.ResetBlend = clear ? 1.0f : 0.0f;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = ddgiData.Cascades[cascadeIndex];
data.ProbeScrollClears[cascadeIndex] = Int4(cascade.ProbeScrollClears, 0);
}
if (renderContext.List->Settings.AntiAliasing.Mode == AntialiasingMode::TemporalAntialiasing)
{
// Use temporal offset in the dithering factor (gets cleaned out by TAA)
@@ -482,26 +506,6 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
context->BindCB(0, _cb0);
}
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU("Probes Classification");
uint32 threadGroups = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
bindingDataSDF.BindCascades(context, 0);
bindingDataSDF.BindCascadeMips(context, 4);
context->BindUA(0, ddgiData.Result.ProbesState);
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
if (cascadeSkipUpdate[cascadeIndex])
continue;
Data1 data;
data.CascadeIndex = cascadeIndex;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
context->Dispatch(_csClassify, threadGroups, 1, 1);
}
context->ResetUA();
}
// Update probes
{
PROFILE_GPU_CPU("Probes Update");
@@ -513,10 +517,38 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
continue;
anyDirty = true;
// Classify probes (activation/deactivation and relocation)
{
PROFILE_GPU_CPU("Classify Probes");
uint32 activeProbesCount = 0;
context->UpdateBuffer(ddgiData.ActiveProbes, &activeProbesCount, sizeof(uint32), 0);
threadGroupsX = Math::DivideAndRoundUp(probesCountCascade, DDGI_PROBE_CLASSIFY_GROUP_SIZE);
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindUA(0, ddgiData.Result.ProbesState);
context->BindUA(1, ddgiData.ActiveProbes->View());
Data1 data;
data.CascadeIndex = cascadeIndex;
context->UpdateCB(_cb1, &data);
context->BindCB(1, _cb1);
context->Dispatch(_csClassify, threadGroupsX, 1, 1);
context->ResetUA();
context->ResetSR();
}
// Build indirect args for probes updating (loop over active-only probes)
{
PROFILE_GPU_CPU("Init Args");
context->BindSR(0, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.UpdateProbesInitArgs->View());
context->Dispatch(_csUpdateProbesInitArgs, 1, 1, 1);
context->ResetUA();
}
// Update probes in batches so ProbesTrace texture can be smaller
uint32 arg = 0;
for (int32 probesOffset = 0; probesOffset < probesCountCascade; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
{
uint32 probesBatchSize = Math::Min(probesCountCascade - probesOffset, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT);
Data1 data;
data.CascadeIndex = cascadeIndex;
data.ProbeIndexOffset = probesOffset;
@@ -528,26 +560,20 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
PROFILE_GPU_CPU("Trace Rays");
// Global SDF with Global Surface Atlas software raytracing (thread X - per probe ray, thread Y - per probe)
ASSERT_LOW_LAYER((probeRaysCount % DDGI_TRACE_RAYS_GROUP_SIZE_X) == 0);
bindingDataSDF.BindCascades(context, 0);
bindingDataSDF.BindCascadeMips(context, 4);
context->BindSR(8, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(9, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(10, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(11, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(12, ddgiData.Result.ProbesState);
context->BindSR(13, skybox);
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(2, bindingDataSurfaceAtlas.Chunks ? bindingDataSurfaceAtlas.Chunks->View() : nullptr);
context->BindSR(3, bindingDataSurfaceAtlas.CulledObjects ? bindingDataSurfaceAtlas.CulledObjects->View() : nullptr);
context->BindSR(4, bindingDataSurfaceAtlas.Objects ? bindingDataSurfaceAtlas.Objects->View() : nullptr);
context->BindSR(5, bindingDataSurfaceAtlas.AtlasDepth->View());
context->BindSR(6, bindingDataSurfaceAtlas.AtlasLighting->View());
context->BindSR(7, ddgiData.Result.ProbesState);
context->BindSR(8, skybox);
context->BindSR(9, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.ProbesTrace->View());
context->Dispatch(_csTraceRays, probeRaysCount / DDGI_TRACE_RAYS_GROUP_SIZE_X, probesBatchSize, 1);
context->DispatchIndirect(_csTraceRays[(int32)Graphics::GIQuality], ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA();
context->ResetSR();
#if 0
// Probes trace debug preview
context->SetViewportAndScissors(renderContext.View.ScreenSize.X, renderContext.View.ScreenSize.Y);
context->SetRenderTarget(lightBuffer);
context->Draw(ddgiData.ProbesTrace);
return false;
#endif
}
// Update probes irradiance and distance textures (one thread-group per probe)
@@ -555,11 +581,16 @@ bool DynamicDiffuseGlobalIlluminationPass::Render(RenderContext& renderContext,
PROFILE_GPU_CPU("Update Probes");
context->BindSR(0, ddgiData.Result.ProbesState);
context->BindSR(1, ddgiData.ProbesTrace->View());
context->BindSR(2, ddgiData.ActiveProbes->View());
context->BindUA(0, ddgiData.Result.ProbesIrradiance);
context->Dispatch(_csUpdateProbesIrradiance, probesBatchSize, 1, 1);
context->DispatchIndirect(_csUpdateProbesIrradiance, ddgiData.UpdateProbesInitArgs, arg);
context->BindUA(0, ddgiData.Result.ProbesDistance);
context->Dispatch(_csUpdateProbesDistance, probesBatchSize, 1, 1);
context->DispatchIndirect(_csUpdateProbesDistance, ddgiData.UpdateProbesInitArgs, arg);
context->ResetUA();
context->ResetSR();
}
arg += sizeof(GPUDispatchIndirectArgs);
}
}

View File

@@ -17,7 +17,6 @@ public:
{
Float4 ProbesOriginAndSpacing[4];
Int4 ProbesScrollOffsets[4];
Int4 ProbeScrollDirections[4];
uint32 ProbesCounts[3];
uint32 CascadesCount;
float IrradianceGamma;
@@ -25,7 +24,7 @@ public:
float RayMaxDistance;
float IndirectLightingIntensity;
Float4 RaysRotation;
Float3 ViewDir;
Float3 ViewPos;
uint32 RaysCount;
Float3 FallbackIrradiance;
float Padding0;
@@ -46,7 +45,8 @@ private:
GPUConstantBuffer* _cb0 = nullptr;
GPUConstantBuffer* _cb1 = nullptr;
GPUShaderProgramCS* _csClassify;
GPUShaderProgramCS* _csTraceRays;
GPUShaderProgramCS* _csUpdateProbesInitArgs;
GPUShaderProgramCS* _csTraceRays[4];
GPUShaderProgramCS* _csUpdateProbesIrradiance;
GPUShaderProgramCS* _csUpdateProbesDistance;
GPUShaderProgramCS* _csUpdateBordersIrradianceRow;

View File

@@ -10,8 +10,8 @@
#include "Engine/Core/Math/OrientedBoundingBox.h"
#include "Engine/Engine/Engine.h"
#include "Engine/Content/Content.h"
#include "Engine/Core/Config/GraphicsSettings.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/Graphics.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderTargetPool.h"
@@ -34,7 +34,7 @@
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
#define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations)
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty)
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
#include "Engine/Debug/DebugDraw.h"
@@ -86,7 +86,8 @@ struct GlobalSurfaceAtlasTile : RectPack<GlobalSurfaceAtlasTile, uint16>
struct GlobalSurfaceAtlasObject
{
uint64 LastFrameUsed;
uint64 LastFrameDirty;
uint64 LastFrameUpdated;
uint64 LightingUpdateFrame; // Index of the frame to update lighting for this object (calculated when object gets dirty or overriden by dynamic lights)
Actor* Actor;
GlobalSurfaceAtlasTile* Tiles[6];
float Radius;
@@ -120,6 +121,12 @@ struct GlobalSurfaceAtlasObject
}
};
struct GlobalSurfaceAtlasLight
{
uint64 LastFrameUsed = 0;
uint64 LastFrameUpdated = 0;
};
class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, public ISceneRenderingListener
{
public:
@@ -134,10 +141,12 @@ public:
GPUTexture* AtlasLighting = nullptr;
GPUBuffer* ChunksBuffer = nullptr;
GPUBuffer* CulledObjectsBuffer = nullptr;
DynamicTypedBuffer ObjectsBuffer;
int32 CulledObjectsCounterIndex = -1;
GlobalSurfaceAtlasPass::BindingData Result;
GlobalSurfaceAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles
Dictionary<void*, GlobalSurfaceAtlasObject> Objects;
Dictionary<Guid, GlobalSurfaceAtlasLight> Lights;
// Cached data to be reused during RasterizeActor
uint64 CurrentFrame;
@@ -148,12 +157,18 @@ public:
float DistanceScalingEnd;
float DistanceScaling;
GlobalSurfaceAtlasCustomBuffer()
: ObjectsBuffer(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"))
{
}
FORCE_INLINE void ClearObjects()
{
CulledObjectsCounterIndex = -1;
LastFrameAtlasDefragmentation = Engine::FrameCount;
SAFE_DELETE(AtlasTiles);
Objects.Clear();
Lights.Clear();
}
FORCE_INLINE void Clear()
@@ -188,7 +203,13 @@ public:
if (object)
{
// Dirty object to redraw
object->LastFrameDirty = 0;
object->LastFrameUpdated = 0;
}
GlobalSurfaceAtlasLight* light = Lights.TryGet(a->GetID());
if (light)
{
// Dirty light to redraw
light->LastFrameUpdated = 0;
}
}
}
@@ -265,12 +286,20 @@ bool GlobalSurfaceAtlasPass::setupResources()
if (_psClear->Init(psDesc))
return true;
}
if (!_psDirectLighting0)
{
_psDirectLighting0 = device->CreatePipelineState();
psDesc.DepthTestEnable = false;
psDesc.DepthWriteEnable = false;
psDesc.DepthFunc = ComparisonFunc::Never;
if (!_psClearLighting)
{
_psClearLighting = device->CreatePipelineState();
psDesc.VS = shader->GetVS("VS_Atlas");
psDesc.PS = shader->GetPS("PS_ClearLighting");
if (_psClearLighting->Init(psDesc))
return true;
}
if (!_psDirectLighting0)
{
_psDirectLighting0 = device->CreatePipelineState();
psDesc.BlendMode = BlendingMode::Add;
psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::RGB;
psDesc.PS = shader->GetPS("PS_Lighting", 0);
@@ -294,6 +323,7 @@ bool GlobalSurfaceAtlasPass::setupResources()
void GlobalSurfaceAtlasPass::OnShaderReloading(Asset* obj)
{
SAFE_DELETE_GPU_RESOURCE(_psClear);
SAFE_DELETE_GPU_RESOURCE(_psClearLighting);
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting0);
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting1);
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
@@ -309,9 +339,9 @@ void GlobalSurfaceAtlasPass::Dispose()
// Cleanup
SAFE_DELETE(_vertexBuffer);
SAFE_DELETE(_objectsBuffer);
SAFE_DELETE_GPU_RESOURCE(_culledObjectsSizeBuffer);
SAFE_DELETE_GPU_RESOURCE(_psClear);
SAFE_DELETE_GPU_RESOURCE(_psClearLighting);
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting0);
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting1);
SAFE_DELETE_GPU_RESOURCE(_psIndirectLighting);
@@ -345,8 +375,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
PROFILE_GPU_CPU("Global Surface Atlas");
// Setup options
const int32 resolution = Math::Clamp(Graphics::GlobalSurfaceAtlasResolution, 256, GPU_MAX_TEXTURE_SIZE);
const float resolutionInv = 1.0f / resolution;
auto* graphicsSettings = GraphicsSettings::Get();
const int32 resolution = Math::Clamp(graphicsSettings->GlobalSurfaceAtlasResolution, 256, GPU_MAX_TEXTURE_SIZE);
const float resolutionInv = 1.0f / (float)resolution;
auto& giSettings = renderContext.List->Settings.GlobalIllumination;
const float distance = giSettings.Distance;
@@ -394,8 +425,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
surfaceAtlasData.AtlasTiles = New<GlobalSurfaceAtlasTile>(0, 0, resolution, resolution);
if (!_vertexBuffer)
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
if (!_objectsBuffer)
_objectsBuffer = New<DynamicTypedBuffer>(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"));
// Utility for writing into tiles vertex buffer
const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
@@ -430,7 +459,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Add objects into the atlas
{
PROFILE_CPU_NAMED("Draw");
_objectsBuffer->Clear();
surfaceAtlasData.ObjectsBuffer.Clear();
_dirtyObjectsBuffer.Clear();
_surfaceAtlasData = &surfaceAtlasData;
renderContext.View.Pass = DrawPass::GlobalSurfaceAtlas;
@@ -589,7 +618,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Send objects data to the GPU
{
PROFILE_GPU_CPU("Update Objects");
_objectsBuffer->Flush(context);
surfaceAtlasData.ObjectsBuffer.Flush(context);
}
// Init constants
@@ -607,10 +636,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
// Each chunk (ChunksBuffer) contains uint with address of the culled objects data start in CulledObjectsBuffer.
// If chunk has address=0 then it's unused/empty.
// Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer.
// Each chunk data contains objects count + all objects with tiles copied into buffer.
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects (less objects and data already in place).
// Each chunk data contains objects count + all objects addresses.
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects.
PROFILE_GPU_CPU("Cull Objects");
uint32 objectsBufferCapacity = (uint32)((float)_objectsBuffer->Data.Count() * 1.3f);
uint32 objectsBufferCapacity = (uint32)((float)surfaceAtlasData.Objects.Count() * 1.3f);
// Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity
if (surfaceAtlasData.ChunksBuffer)
@@ -634,7 +663,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
_culledObjectsSizeBuffer->Unmap();
if (counter > 0)
{
objectsBufferCapacity = counter * sizeof(Float4);
objectsBufferCapacity = counter;
notReady = false;
}
}
@@ -652,28 +681,28 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
}
}
}
if (surfaceAtlasData.CulledObjectsCounterIndex != -1)
if (surfaceAtlasData.CulledObjectsCounterIndex != -1 && surfaceAtlasData.CulledObjectsBuffer)
{
// Copy current counter value
_culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame;
context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.ChunksBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0);
context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.CulledObjectsBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0);
}
}
// Allocate buffer for culled objects (estimated size)
objectsBufferCapacity = Math::Min(Math::AlignUp(objectsBufferCapacity, 4096u), (uint32)MAX_int32);
objectsBufferCapacity = Math::Min(Math::AlignUp<uint32>(objectsBufferCapacity * sizeof(uint32), 4096u), (uint32)MAX_int32);
if (!surfaceAtlasData.CulledObjectsBuffer)
surfaceAtlasData.CulledObjectsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsBuffer"));
if (surfaceAtlasData.CulledObjectsBuffer->GetSize() < objectsBufferCapacity)
{
const GPUBufferDescription desc = GPUBufferDescription::Buffer(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource, PixelFormat::R32G32B32A32_Float, nullptr, sizeof(Float4));
const auto desc = GPUBufferDescription::Raw(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource);
if (surfaceAtlasData.CulledObjectsBuffer->Init(desc))
return true;
}
// Clear chunks counter (chunk at 0 is used for a counter so chunks buffer is aligned)
uint32 counter = 1; // Indicate that 1st float4 is used so value 0 can be used as invalid chunk address
context->UpdateBuffer(surfaceAtlasData.ChunksBuffer, &counter, sizeof(counter), 0);
// Clear chunks counter (uint at 0 is used for a counter)
uint32 counter = 1; // Move write location for culled objects after counter
context->UpdateBuffer(surfaceAtlasData.CulledObjectsBuffer, &counter, sizeof(counter), 0);
// Cull objects into chunks (1 thread per chunk)
Data0 data;
@@ -686,7 +715,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
context->BindCB(0, _cb0);
static_assert(GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION % GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE == 0, "Invalid chunks resolution/groups setting.");
const int32 chunkDispatchGroups = GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION / GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE;
context->BindSR(0, _objectsBuffer->GetBuffer()->View());
context->BindSR(0, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View());
context->BindUA(0, surfaceAtlasData.ChunksBuffer->View());
context->BindUA(1, surfaceAtlasData.CulledObjectsBuffer->View());
context->Dispatch(_csCullObjects, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
@@ -700,11 +729,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
{
for (int32 x = 0; x < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; x++)
{
Float3 chunkCoord(x, y, z);
Float3 chunkMin = result.GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.GlobalSurfaceAtlas.ChunkSize;
Float3 chunkMax = chunkMin + result.GlobalSurfaceAtlas.ChunkSize;
Float3 chunkCoord((float)x, (float)y, (float)z);
Float3 chunkMin = result.Constants.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.Constants.ChunkSize;
Float3 chunkMax = chunkMin + result.Constants.ChunkSize;
BoundingBox chunkBounds(chunkMin, chunkMax);
if (Float3::Distance(chunkBounds.GetCenter(), result.GlobalSurfaceAtlas.ViewPos) >= 2000.0f)
if (Float3::Distance(chunkBounds.GetCenter(), result.Constants.ViewPos) >= 2000.0f)
continue;
int32 count = 0;
@@ -733,37 +762,136 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
result.Atlas[4] = surfaceAtlasData.AtlasLighting;
result.Chunks = surfaceAtlasData.ChunksBuffer;
result.CulledObjects = surfaceAtlasData.CulledObjectsBuffer;
result.Objects = surfaceAtlasData.ObjectsBuffer.GetBuffer();
surfaceAtlasData.Result = result;
// Render direct lighting into atlas
if (surfaceAtlasData.Objects.Count() != 0)
{
PROFILE_GPU_CPU("Direct Lighting");
// Copy emissive light into the final direct lighting atlas
// TODO: test perf diff when manually copying only dirty object tiles and dirty light tiles together with indirect lighting
{
PROFILE_GPU_CPU("Copy Emissive");
context->CopyTexture(surfaceAtlasData.AtlasLighting, 0, 0, 0, 0, surfaceAtlasData.AtlasEmissive, 0);
}
context->SetViewportAndScissors(Viewport(0, 0, (float)resolution, (float)resolution));
context->SetRenderTarget(surfaceAtlasData.AtlasLighting->View());
context->BindSR(0, surfaceAtlasData.AtlasGBuffer0->View());
context->BindSR(1, surfaceAtlasData.AtlasGBuffer1->View());
context->BindSR(2, surfaceAtlasData.AtlasGBuffer2->View());
context->BindSR(3, surfaceAtlasData.AtlasDepth->View());
context->BindSR(4, _objectsBuffer->GetBuffer()->View());
bindingDataSDF.BindCascades(context, 5);
bindingDataSDF.BindCascadeMips(context, 9);
context->BindSR(4, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View());
context->BindSR(5, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(6, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindCB(0, _cb0);
Data0 data;
data.ViewWorldPos = renderContext.View.Position;
data.GlobalSDF = bindingDataSDF.Constants;
data.GlobalSurfaceAtlas = result.Constants;
// Collect objects to update lighting this frame (dirty objects and dirty lights)
bool allLightingDirty = false;
for (auto& light : renderContext.List->DirectionalLights)
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = (light.StaticFlags & StaticFlags::Lightmap) ? 120 : 4;
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
// Mark all objects to shade
allLightingDirty = true;
}
if (renderContext.View.Flags & ViewFlags::GI && (renderContext.List->DirectionalLights.Count() != 1 || renderContext.List->DirectionalLights[0].StaticFlags & StaticFlags::Lightmap))
{
switch (renderContext.List->Settings.GlobalIllumination.Mode)
{
case GlobalIlluminationMode::DDGI:
{
DynamicDiffuseGlobalIlluminationPass::BindingData bindingDataDDGI;
if (!DynamicDiffuseGlobalIlluminationPass::Instance()->Get(renderContext.Buffers, bindingDataDDGI))
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[Guid(0, 0, 0, 1)];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = 4; // GI Bounce redraw minimum frequency
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < redrawFramesCount)
break;
lightData.LastFrameUpdated = currentFrame;
// Mark all objects to shade
allLightingDirty = true;
}
break;
}
}
}
for (auto& light : renderContext.List->PointLights)
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = (light.StaticFlags & StaticFlags::Lightmap) ? 120 : 4;
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
if (!allLightingDirty)
{
// Mark objects to shade
for (auto& e : surfaceAtlasData.Objects)
{
auto& object = e.Value;
Float3 lightToObject = object.Bounds.GetCenter() - light.Position;
if (lightToObject.LengthSquared() >= Math::Square(object.Radius + light.Radius))
continue;
object.LightingUpdateFrame = currentFrame;
}
}
}
for (auto& light : renderContext.List->SpotLights)
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = (light.StaticFlags & StaticFlags::Lightmap) ? 120 : 4;
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
if (!allLightingDirty)
{
// Mark objects to shade
for (auto& e : surfaceAtlasData.Objects)
{
auto& object = e.Value;
Float3 lightToObject = object.Bounds.GetCenter() - light.Position;
if (lightToObject.LengthSquared() >= Math::Square(object.Radius + light.Radius))
continue;
object.LightingUpdateFrame = currentFrame;
}
}
}
// Copy emissive light into the final direct lighting atlas
{
PROFILE_GPU_CPU("Copy Emissive");
_vertexBuffer->Clear();
for (const auto& e : surfaceAtlasData.Objects)
{
const auto& object = e.Value;
if (!allLightingDirty && object.LightingUpdateFrame != currentFrame)
continue;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
auto* tile = object.Tiles[tileIndex];
if (!tile)
continue;
VB_WRITE_TILE(tile);
}
}
if (_vertexBuffer->Data.Count() != 0)
{
context->BindSR(7, surfaceAtlasData.AtlasEmissive);
context->SetState(_psClearLighting);
VB_DRAW();
}
}
// Shade object tiles influenced by lights to calculate direct lighting
// TODO: reduce redraw frequency for static lights (StaticFlags::Lightmap)
for (auto& light : renderContext.List->DirectionalLights)
{
// Collect tiles to shade
@@ -771,6 +899,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
for (const auto& e : surfaceAtlasData.Objects)
{
const auto& object = e.Value;
if (!allLightingDirty && object.LightingUpdateFrame != currentFrame)
continue;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
auto* tile = object.Tiles[tileIndex];
@@ -779,8 +909,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
VB_WRITE_TILE(tile);
}
}
if (_vertexBuffer->Data.Count() == 0)
continue;
// Draw draw light
PROFILE_GPU_CPU("Directional Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
// TODO: test perf/quality when using Shadow Map for directional light (ShadowsPass::Instance()->LastDirLightShadowMap) instead of Global SDF trace
light.SetupLightData(&data.Light, useShadow);
@@ -797,6 +930,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
for (const auto& e : surfaceAtlasData.Objects)
{
const auto& object = e.Value;
if (!allLightingDirty && object.LightingUpdateFrame != currentFrame)
continue;
Float3 lightToObject = object.Bounds.GetCenter() - light.Position;
if (lightToObject.LengthSquared() >= Math::Square(object.Radius + light.Radius))
continue;
@@ -808,8 +943,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
VB_WRITE_TILE(tile);
}
}
if (_vertexBuffer->Data.Count() == 0)
continue;
// Draw draw light
PROFILE_GPU_CPU("Point Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
light.SetupLightData(&data.Light, useShadow);
data.Light.Color *= light.IndirectLightingIntensity;
@@ -825,6 +963,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
for (const auto& e : surfaceAtlasData.Objects)
{
const auto& object = e.Value;
if (!allLightingDirty && object.LightingUpdateFrame != currentFrame)
continue;
Float3 lightToObject = object.Bounds.GetCenter() - light.Position;
if (lightToObject.LengthSquared() >= Math::Square(object.Radius + light.Radius))
continue;
@@ -836,8 +976,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
VB_WRITE_TILE(tile);
}
}
if (_vertexBuffer->Data.Count() == 0)
continue;
// Draw draw light
PROFILE_GPU_CPU("Spot Light");
const bool useShadow = CanRenderShadow(renderContext.View, light);
light.SetupLightData(&data.Light, useShadow);
data.Light.Color *= light.IndirectLightingIntensity;
@@ -846,9 +989,17 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
context->SetState(_psDirectLighting1);
VB_DRAW();
}
// Remove unused lights
for (auto it = surfaceAtlasData.Lights.Begin(); it.IsNotEnd(); ++it)
{
if (it->Value.LastFrameUsed != currentFrame)
surfaceAtlasData.Lights.Remove(it);
}
// Draw draw indirect light from Global Illumination
if (renderContext.View.Flags & ViewFlags::GI)
{
// Draw draw indirect light from Global Illumination
switch (renderContext.List->Settings.GlobalIllumination.Mode)
{
case GlobalIlluminationMode::DDGI:
@@ -860,6 +1011,8 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
for (const auto& e : surfaceAtlasData.Objects)
{
const auto& object = e.Value;
if (!allLightingDirty && object.LightingUpdateFrame != currentFrame)
continue;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
auto* tile = object.Tiles[tileIndex];
@@ -868,6 +1021,9 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
VB_WRITE_TILE(tile);
}
}
if (_vertexBuffer->Data.Count() == 0)
break;
PROFILE_GPU_CPU("DDGI");
data.DDGI = bindingDataDDGI.Constants;
context->BindSR(5, bindingDataDDGI.ProbesState);
context->BindSR(6, bindingDataDDGI.ProbesDistance);
@@ -927,12 +1083,13 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0);
}
bindingDataSDF.BindCascades(context, 0);
bindingDataSDF.BindCascadeMips(context, 4);
context->BindSR(8, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
context->BindSR(9, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
context->BindSR(10, bindingData.AtlasDepth->View());
context->BindSR(12, skybox);
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr);
context->BindSR(6, bindingData.AtlasDepth->View());
context->BindSR(7, skybox);
context->SetState(_psDebug);
{
Float2 outputSizeThird = outputSize * 0.333f;
@@ -943,7 +1100,7 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
context->SetRenderTarget(tempBuffer->View());
// Full screen - direct light
context->BindSR(11, bindingData.AtlasLighting->View());
context->BindSR(5, bindingData.AtlasLighting->View());
context->SetViewport(outputSize.X, outputSize.Y);
context->SetScissor(Rectangle(0, 0, outputSizeTwoThird.X, outputSize.Y));
context->DrawFullscreenTriangle();
@@ -957,12 +1114,13 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
context->ResetRenderTarget();
// Rebind resources
bindingDataSDF.BindCascades(context, 0);
bindingDataSDF.BindCascadeMips(context, 4);
context->BindSR(8, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
context->BindSR(9, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
context->BindSR(10, bindingData.AtlasDepth->View());
context->BindSR(12, skybox);
context->BindSR(0, bindingDataSDF.Texture ? bindingDataSDF.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingDataSDF.TextureMip ? bindingDataSDF.TextureMip->ViewVolume() : nullptr);
context->BindSR(2, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
context->BindSR(3, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
context->BindSR(4, bindingData.Objects ? bindingData.Objects->View() : nullptr);
context->BindSR(6, bindingData.AtlasDepth->View());
context->BindSR(7, skybox);
context->BindCB(0, _cb0);
context->SetState(_psDebug);
context->SetRenderTarget(output->View());
@@ -972,23 +1130,23 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
context->UpdateCB(_cb0, &data);
// Bottom left - diffuse
context->BindSR(11, bindingData.AtlasGBuffer0->View());
context->BindSR(5, bindingData.AtlasGBuffer0->View());
context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, 0, outputSizeThird.X, outputSizeThird.Y));
context->DrawFullscreenTriangle();
// Bottom middle - normals
context->BindSR(11, bindingData.AtlasGBuffer1->View());
context->BindSR(5, bindingData.AtlasGBuffer1->View());
context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, outputSizeThird.Y, outputSizeThird.X, outputSizeThird.Y));
context->DrawFullscreenTriangle();
// Bottom right - roughness/metalness/ao
context->BindSR(11, bindingData.AtlasGBuffer2->View());
context->BindSR(5, bindingData.AtlasGBuffer2->View());
context->SetViewportAndScissors(Viewport(outputSizeTwoThird.X, outputSizeTwoThird.Y, outputSizeThird.X, outputSizeThird.Y));
context->DrawFullscreenTriangle();
}
}
void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, const BoundingSphere& actorObjectBounds, const Matrix& localToWorld, const BoundingBox& localBounds, uint32 tilesMask)
void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, const BoundingSphere& actorObjectBounds, const Matrix& localToWorld, const BoundingBox& localBounds, uint32 tilesMask, bool useVisibility)
{
GlobalSurfaceAtlasCustomBuffer& surfaceAtlasData = *_surfaceAtlasData;
Float3 boundsSize = localBounds.GetSize() * actor->GetScale();
@@ -1058,7 +1216,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
// Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed)
uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4;
if (surfaceAtlasData.CurrentFrame - object->LastFrameDirty >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount)))
if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount)))
dirty = true;
// Mark object as used
@@ -1069,7 +1227,8 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
object->Radius = (float)actorObjectBounds.Radius;
if (dirty || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES)
{
object->LastFrameDirty = surfaceAtlasData.CurrentFrame;
object->LastFrameUpdated = surfaceAtlasData.CurrentFrame;
object->LightingUpdateFrame = surfaceAtlasData.CurrentFrame;
_dirtyObjectsBuffer.Add(actorObject);
}
@@ -1078,14 +1237,14 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
object->Bounds.Transformation.GetWorld(localToWorldBounds);
Matrix worldToLocalBounds;
Matrix::Invert(localToWorldBounds, worldToLocalBounds);
uint32 objectAddress = _objectsBuffer->Data.Count() / sizeof(Float4);
auto* objectData = _objectsBuffer->WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4);
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
objectData[0] = *(Float4*)&actorObjectBounds;
objectData[1] = Float4::Zero; // w unused
objectData[1] = Float4::Zero;
objectData[2] = Float4(worldToLocalBounds.M11, worldToLocalBounds.M12, worldToLocalBounds.M13, worldToLocalBounds.M41);
objectData[3] = Float4(worldToLocalBounds.M21, worldToLocalBounds.M22, worldToLocalBounds.M23, worldToLocalBounds.M42);
objectData[4] = Float4(worldToLocalBounds.M31, worldToLocalBounds.M32, worldToLocalBounds.M33, worldToLocalBounds.M43);
objectData[5] = Float4(object->Bounds.Extents, 0.0f); // w unused
objectData[5] = Float4(object->Bounds.Extents, useVisibility ? 1.0f : 0.0f);
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
@@ -1130,7 +1289,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
// Per-tile data
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
auto* tileData = _objectsBuffer->WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv;
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);

View File

@@ -38,6 +38,7 @@ public:
};
GPUBuffer* Chunks;
GPUBuffer* CulledObjects;
GPUBuffer* Objects;
ConstantsData Constants;
};
@@ -45,6 +46,7 @@ private:
bool _supported = false;
AssetReference<Shader> _shader;
GPUPipelineState* _psClear = nullptr;
GPUPipelineState* _psClearLighting = nullptr;
GPUPipelineState* _psDirectLighting0 = nullptr;
GPUPipelineState* _psDirectLighting1 = nullptr;
GPUPipelineState* _psIndirectLighting = nullptr;
@@ -54,7 +56,6 @@ private:
// Cache
class GPUBuffer* _culledObjectsSizeBuffer = nullptr;
class DynamicTypedBuffer* _objectsBuffer = nullptr;
class DynamicVertexBuffer* _vertexBuffer = nullptr;
class GlobalSurfaceAtlasCustomBuffer* _surfaceAtlasData;
Array<void*> _dirtyObjectsBuffer;
@@ -79,7 +80,7 @@ public:
void RenderDebug(RenderContext& renderContext, GPUContext* context, GPUTexture* output);
// Rasterize actor into the Global Surface Atlas. Call it from actor Draw() method during DrawPass::GlobalSurfaceAtlas.
void RasterizeActor(Actor* actor, void* actorObject, const BoundingSphere& actorObjectBounds, const Matrix& localToWorld, const BoundingBox& localBounds, uint32 tilesMask = MAX_uint32);
void RasterizeActor(Actor* actor, void* actorObject, const BoundingSphere& actorObjectBounds, const Matrix& localToWorld, const BoundingBox& localBounds, uint32 tilesMask = MAX_uint32, bool useVisibility = true);
private:
#if COMPILE_WITH_DEV_ENV

View File

@@ -61,14 +61,18 @@ PACK_STRUCT(struct ModelsRasterizeData
Int3 ChunkCoord;
float MaxDistance;
Float3 CascadeCoordToPosMul;
int ObjectsCount;
int32 ObjectsCount;
Float3 CascadeCoordToPosAdd;
int32 CascadeResolution;
float Padding0;
int32 CascadeIndex;
float CascadeVoxelSize;
int32 CascadeMipResolution;
int32 CascadeMipFactor;
uint32 Objects[GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT];
uint32 GenerateMipTexResolution;
uint32 GenerateMipCoordScale;
uint32 GenerateMipTexOffsetX;
uint32 GenerateMipMipOffsetX;
});
struct RasterizeModel
@@ -125,8 +129,6 @@ uint32 GetHash(const RasterizeChunkKey& key)
struct CascadeData
{
GPUTexture* Texture = nullptr;
GPUTexture* Mip = nullptr;
Float3 Position;
float VoxelSize;
BoundingBox Bounds;
@@ -163,18 +165,14 @@ struct CascadeData
}
}
}
~CascadeData()
{
RenderTargetPool::Release(Texture);
RenderTargetPool::Release(Mip);
}
};
class GlobalSignDistanceFieldCustomBuffer : public RenderBuffers::CustomBuffer, public ISceneRenderingListener
{
public:
int32 Resolution = 0;
GPUTexture* Texture = nullptr;
GPUTexture* TextureMip = nullptr;
Array<CascadeData, FixedAllocation<4>> Cascades;
HashSet<ScriptingTypeHandle> ObjectTypes;
HashSet<GPUTexture*> SDFTextures;
@@ -187,6 +185,8 @@ public:
e.Item->Deleted.Unbind<GlobalSignDistanceFieldCustomBuffer, &GlobalSignDistanceFieldCustomBuffer::OnSDFTextureDeleted>(this);
e.Item->ResidentMipsChanged.Unbind<GlobalSignDistanceFieldCustomBuffer, &GlobalSignDistanceFieldCustomBuffer::OnSDFTextureResidentMipsChanged>(this);
}
RenderTargetPool::Release(Texture);
RenderTargetPool::Release(TextureMip);
}
void OnSDFTextureDeleted(ScriptingObject* object)
@@ -300,8 +300,7 @@ bool GlobalSignDistanceFieldPass::setupResources()
_csRasterizeModel1 = shader->GetCS("CS_RasterizeModel", 1);
_csRasterizeHeightfield = shader->GetCS("CS_RasterizeHeightfield");
_csClearChunk = shader->GetCS("CS_ClearChunk");
_csGenerateMip0 = shader->GetCS("CS_GenerateMip", 0);
_csGenerateMip1 = shader->GetCS("CS_GenerateMip", 1);
_csGenerateMip = shader->GetCS("CS_GenerateMip");
// Init buffer
if (!_objectsBuffer)
@@ -329,8 +328,7 @@ void GlobalSignDistanceFieldPass::OnShaderReloading(Asset* obj)
_csRasterizeModel1 = nullptr;
_csRasterizeHeightfield = nullptr;
_csClearChunk = nullptr;
_csGenerateMip0 = nullptr;
_csGenerateMip1 = nullptr;
_csGenerateMip = nullptr;
_cb0 = nullptr;
_cb1 = nullptr;
invalidateResources();
@@ -351,18 +349,6 @@ void GlobalSignDistanceFieldPass::Dispose()
ChunksCache.SetCapacity(0);
}
void GlobalSignDistanceFieldPass::BindingData::BindCascades(GPUContext* context, int32 srvSlot)
{
for (int32 i = 0; i < 4; i++)
context->BindSR(srvSlot + i, Cascades[i] ? Cascades[i]->ViewVolume() : nullptr);
}
void GlobalSignDistanceFieldPass::BindingData::BindCascadeMips(GPUContext* context, int32 srvSlot)
{
for (int32 i = 0; i < 4; i++)
context->BindSR(srvSlot + i, CascadeMips[i] ? CascadeMips[i]->ViewVolume() : nullptr);
}
bool GlobalSignDistanceFieldPass::Get(const RenderBuffers* buffers, BindingData& result)
{
auto* sdfData = buffers ? buffers->FindCustomBuffer<GlobalSignDistanceFieldCustomBuffer>(TEXT("GlobalSignDistanceField")) : nullptr;
@@ -428,14 +414,13 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
sdfData.Cascades.Resize(cascadesCount);
sdfData.Resolution = resolution;
updated = true;
auto desc = GPUTextureDescription::New3D(resolution, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1);
for (auto& cascade : sdfData.Cascades)
auto desc = GPUTextureDescription::New3D(resolution * cascadesCount, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1);
{
GPUTexture*& texture = cascade.Texture;
GPUTexture*& texture = sdfData.Texture;
if (texture && texture->Width() != desc.Width)
{
RenderTargetPool::Release(texture);
texture = nullptr;
sdfData.Texture = nullptr;
}
if (!texture)
{
@@ -444,10 +429,11 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
return true;
}
}
desc.Width = desc.Height = desc.Depth = resolutionMip;
desc.Width = resolutionMip * cascadesCount;
desc.Height = desc.Depth = resolutionMip;
for (auto& cascade : sdfData.Cascades)
{
GPUTexture*& texture = cascade.Mip;
GPUTexture*& texture = sdfData.TextureMip;
if (texture && texture->Width() != desc.Width)
{
RenderTargetPool::Release(texture);
@@ -469,10 +455,12 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
{
cascade.NonEmptyChunks.Clear();
cascade.StaticChunks.Clear();
context->ClearUA(cascade.Texture, Float4::One);
context->ClearUA(cascade.Mip, Float4::One);
}
LOG(Info, "Global SDF memory usage: {0} MB", (sdfData.Cascades[0].Texture->GetMemoryUsage() + sdfData.Cascades[0].Mip->GetMemoryUsage()) * ARRAY_COUNT(sdfData.Cascades) / 1024 / 1024);
uint64 memoryUsage = sdfData.Texture->GetMemoryUsage();
context->ClearUA(sdfData.Texture, Float4::One);
memoryUsage += sdfData.TextureMip->GetMemoryUsage();
context->ClearUA(sdfData.TextureMip, Float4::One);
LOG(Info, "Global SDF memory usage: {0} MB", memoryUsage / 1024 / 1024);
}
for (SceneRendering* scene : renderContext.List->Scenes)
sdfData.ListenSceneRendering(scene);
@@ -498,6 +486,8 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
bool anyDraw = false;
const uint64 cascadeFrequencies[] = { 2, 3, 5, 11 };
//const uint64 cascadeFrequencies[] = { 1, 1, 1, 1 };
GPUTextureView* textureView = sdfData.Texture->ViewVolume();
GPUTextureView* textureMipView = sdfData.TextureMip->ViewVolume();
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
// Reduce frequency of the updates
@@ -514,8 +504,6 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
BoundingBox cascadeBounds(center - cascadeDistance, center + cascadeDistance);
// TODO: add scene detail scale factor to PostFx settings (eg. to increase or decrease scene details and quality)
const float minObjectRadius = Math::Max(20.0f, cascadeVoxelSize * 0.5f); // Skip too small objects for this cascade
GPUTextureView* cascadeView = cascade.Texture->ViewVolume();
GPUTextureView* cascadeMipView = cascade.Mip->ViewVolume();
// Clear cascade before rasterization
{
@@ -539,6 +527,8 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
_objectsBufferCount = 0;
_voxelSize = cascadeVoxelSize;
_cascadeBounds = cascadeBounds;
_cascadeBounds.Minimum += 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds)
_cascadeBounds.Maximum -= 0.1f; // Adjust to prevent overflowing chunk keys (cascade bounds are used for clamping object bounds)
_cascadeIndex = cascadeIndex;
_sdfData = &sdfData;
{
@@ -556,24 +546,18 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
// Perform batched chunks rasterization
if (!anyDraw)
{
anyDraw = true;
context->ResetSR();
auto desc = GPUTextureDescription::New3D(resolution, resolution, resolution, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1);
tmpMip = RenderTargetPool::Get(desc);
if (!tmpMip)
return true;
}
ModelsRasterizeData data;
data.CascadeCoordToPosMul = (Float3)cascadeBounds.GetSize() / (float)resolution;
data.CascadeCoordToPosAdd = (Float3)cascadeBounds.Minimum + cascadeVoxelSize * 0.5f;
data.MaxDistance = cascadeMaxDistance;
data.CascadeResolution = resolution;
data.CascadeMipResolution = resolutionMip;
data.CascadeIndex = cascadeIndex;
data.CascadeMipFactor = GLOBAL_SDF_RASTERIZE_MIP_FACTOR;
data.CascadeVoxelSize = cascadeVoxelSize;
context->BindUA(0, cascadeView);
context->BindUA(0, textureView);
context->BindCB(1, _cb1);
const int32 chunkDispatchGroups = GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / GLOBAL_SDF_RASTERIZE_GROUP_SIZE;
bool anyChunkDispatch = false;
@@ -738,25 +722,55 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
if (updated || anyChunkDispatch)
{
PROFILE_GPU_CPU("Generate Mip");
context->UpdateCB(_cb1, &data);
context->ResetUA();
context->BindSR(0, cascadeView);
context->BindUA(0, cascadeMipView);
const int32 mipDispatchGroups = Math::DivideAndRoundUp(resolutionMip, GLOBAL_SDF_MIP_GROUP_SIZE);
static_assert((GLOBAL_SDF_MIP_FLOODS % 2) == 1, "Invalid Global SDF mip flood iterations count.");
int32 floodFillIterations = chunks.Count() == 0 ? 1 : GLOBAL_SDF_MIP_FLOODS;
context->Dispatch(_csGenerateMip0, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups);
context->UnBindSR(0);
if (!tmpMip)
{
// Use temporary texture to flood fill mip
auto desc = GPUTextureDescription::New3D(resolutionMip, resolutionMip, resolutionMip, GLOBAL_SDF_FORMAT, GPUTextureFlags::ShaderResource | GPUTextureFlags::UnorderedAccess, 1);
tmpMip = RenderTargetPool::Get(desc);
if (!tmpMip)
return true;
}
GPUTextureView* tmpMipView = tmpMip->ViewVolume();
// Tex -> Mip
// TODO: use push constants on DX12/Vulkan to provide those 4 uints to the shader
data.GenerateMipTexResolution = data.CascadeResolution;
data.GenerateMipCoordScale = data.CascadeMipFactor;
data.GenerateMipTexOffsetX = data.CascadeIndex * data.CascadeResolution;
data.GenerateMipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution;
context->UpdateCB(_cb1, &data);
context->BindSR(0, textureView);
context->BindUA(0, textureMipView);
context->Dispatch(_csGenerateMip, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups);
data.GenerateMipTexResolution = data.CascadeMipResolution;
data.GenerateMipCoordScale = 1;
for (int32 i = 1; i < floodFillIterations; i++)
{
context->ResetUA();
context->BindSR(0, cascadeMipView);
if ((i & 1) == 1)
{
// Mip -> Tmp
context->BindSR(0, textureMipView);
context->BindUA(0, tmpMipView);
context->Dispatch(_csGenerateMip1, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups);
Swap(tmpMipView, cascadeMipView);
data.GenerateMipTexOffsetX = data.CascadeIndex * data.CascadeMipResolution;
data.GenerateMipMipOffsetX = 0;
}
else
{
// Tmp -> Mip
context->BindSR(0, tmpMipView);
context->BindUA(0, textureMipView);
data.GenerateMipTexOffsetX = 0;
data.GenerateMipMipOffsetX = data.CascadeIndex * data.CascadeMipResolution;
}
context->UpdateCB(_cb1, &data);
context->Dispatch(_csGenerateMip, mipDispatchGroups, mipDispatchGroups, mipDispatchGroups);
}
if (floodFillIterations % 2 == 0)
Swap(tmpMipView, cascadeMipView);
}
}
@@ -771,26 +785,22 @@ bool GlobalSignDistanceFieldPass::Render(RenderContext& renderContext, GPUContex
}
// Copy results
static_assert(ARRAY_COUNT(result.Cascades) == ARRAY_COUNT(sdfData.Cascades), "Invalid cascades count.");
static_assert(ARRAY_COUNT(result.CascadeMips) == ARRAY_COUNT(sdfData.Cascades), "Invalid cascades count.");
result.Texture = sdfData.Texture;
result.TextureMip = sdfData.TextureMip;
for (int32 cascadeIndex = 0; cascadeIndex < cascadesCount; cascadeIndex++)
{
auto& cascade = sdfData.Cascades[cascadeIndex];
const float cascadeDistance = distanceExtent * cascadesDistanceScales[cascadeIndex];
const float cascadeMaxDistance = cascadeDistance * 2;
const float cascadeVoxelSize = cascadeMaxDistance / resolution;
const float cascadeVoxelSize = cascadeMaxDistance / (float)resolution;
const Float3 center = cascade.Position;
result.Constants.CascadePosDistance[cascadeIndex] = Float4(center, cascadeDistance);
result.Constants.CascadePosDistance[cascadeIndex] = Vector4(center, cascadeDistance);
result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = cascadeVoxelSize;
result.Cascades[cascadeIndex] = cascade.Texture;
result.CascadeMips[cascadeIndex] = cascade.Mip;
}
for (int32 cascadeIndex = cascadesCount; cascadeIndex < 4; cascadeIndex++)
{
result.Constants.CascadePosDistance[cascadeIndex] = result.Constants.CascadePosDistance[cascadesCount - 1];
result.Constants.CascadeVoxelSize.Raw[cascadeIndex] = result.Constants.CascadeVoxelSize.Raw[cascadesCount - 1];
result.Cascades[cascadeIndex] = nullptr;
result.CascadeMips[cascadeIndex] = nullptr;
}
result.Constants.Resolution = (float)resolution;
result.Constants.CascadesCount = cascadesCount;
@@ -820,8 +830,8 @@ void GlobalSignDistanceFieldPass::RenderDebug(RenderContext& renderContext, GPUC
context->UpdateCB(_cb0, &data);
context->BindCB(0, _cb0);
}
bindingData.BindCascades(context, 0);
bindingData.BindCascadeMips(context, 4);
context->BindSR(0, bindingData.Texture ? bindingData.Texture->ViewVolume() : nullptr);
context->BindSR(1, bindingData.TextureMip ? bindingData.TextureMip->ViewVolume() : nullptr);
context->SetState(_psDebug);
context->SetRenderTarget(output->View());
context->SetViewportAndScissors(outputSize.X, outputSize.Y);

View File

@@ -23,12 +23,9 @@ public:
// Binding data for the GPU.
struct BindingData
{
GPUTexture* Cascades[4];
GPUTexture* CascadeMips[4];
GPUTexture* Texture;
GPUTexture* TextureMip;
ConstantsData Constants;
void BindCascades(GPUContext* context, int32 srvSlot);
void BindCascadeMips(GPUContext* context, int32 srvSlot);
};
private:
@@ -39,8 +36,7 @@ private:
GPUShaderProgramCS* _csRasterizeModel1 = nullptr;
GPUShaderProgramCS* _csRasterizeHeightfield = nullptr;
GPUShaderProgramCS* _csClearChunk = nullptr;
GPUShaderProgramCS* _csGenerateMip0 = nullptr;
GPUShaderProgramCS* _csGenerateMip1 = nullptr;
GPUShaderProgramCS* _csGenerateMip = nullptr;
GPUConstantBuffer* _cb0 = nullptr;
GPUConstantBuffer* _cb1 = nullptr;

View File

@@ -34,6 +34,7 @@ struct RendererDirectionalLightData
float ShadowsSharpness;
float VolumetricScatteringIntensity;
StaticFlags StaticFlags;
float IndirectLightingIntensity;
int8 CastVolumetricShadow : 1;
int8 RenderedVolumetricFog : 1;
@@ -43,6 +44,8 @@ struct RendererDirectionalLightData
float ContactShadowsLength;
ShadowsCastingMode ShadowsMode;
Guid ID;
void SetupLightData(LightData* data, bool useShadow) const;
};
@@ -76,11 +79,13 @@ struct RendererSpotLightData
float IndirectLightingIntensity;
ShadowsCastingMode ShadowsMode;
StaticFlags StaticFlags;
int8 CastVolumetricShadow : 1;
int8 RenderedVolumetricFog : 1;
int8 UseInverseSquaredFalloff : 1;
GPUTexture* IESTexture;
Guid ID;
void SetupLightData(LightData* data, bool useShadow) const;
};
@@ -111,11 +116,13 @@ struct RendererPointLightData
float IndirectLightingIntensity;
ShadowsCastingMode ShadowsMode;
StaticFlags StaticFlags;
int8 CastVolumetricShadow : 1;
int8 RenderedVolumetricFog : 1;
int8 UseInverseSquaredFalloff : 1;
GPUTexture* IESTexture;
Guid ID;
void SetupLightData(LightData* data, bool useShadow) const;
};
@@ -131,10 +138,12 @@ struct RendererSkyLightData
Float3 AdditiveColor;
float IndirectLightingIntensity;
StaticFlags StaticFlags;
int8 CastVolumetricShadow : 1;
int8 RenderedVolumetricFog : 1;
CubeTexture* Image;
Guid ID;
void SetupLightData(LightData* data, bool useShadow) const;
};
@@ -211,7 +220,6 @@ struct DrawBatch
class RenderListAllocation
{
public:
static FLAXENGINE_API void* Allocate(uintptr size);
static FLAXENGINE_API void Free(void* ptr, uintptr size);
@@ -222,7 +230,6 @@ public:
uintptr _size;
public:
FORCE_INLINE Data()
{
}
@@ -349,7 +356,6 @@ DECLARE_SCRIPTING_TYPE(RenderList);
static void CleanupCache();
public:
/// <summary>
/// All scenes for rendering.
/// </summary>
@@ -458,11 +464,9 @@ public:
Float3 FrustumCornersVs[8];
private:
DynamicVertexBuffer _instanceBuffer;
public:
/// <summary>
/// Blends the postprocessing settings into the final options.
/// </summary>
@@ -527,7 +531,6 @@ public:
}
public:
/// <summary>
/// Init cache for given task
/// </summary>
@@ -540,7 +543,6 @@ public:
void Clear();
public:
/// <summary>
/// Adds the draw call to the draw lists.
/// </summary>

View File

@@ -7,6 +7,7 @@
#include "Engine/Platform/Platform.h"
#include "Engine/Threading/Threading.h"
#include "Engine/Serialization/MemoryWriteStream.h"
#include "Engine/Graphics/Config.h"
#include "Engine/GraphicsDevice/Vulkan/Types.h"
// Use glslang for HLSL to SPIR-V translation
@@ -682,6 +683,10 @@ bool ShaderCompilerVulkan::CompileShader(ShaderFunctionMeta& meta, WritePermutat
{
auto& descriptor = descriptorsCollector.Descriptors[i];
// Skip cases (eg. AppendStructuredBuffer counter buffer)
if (descriptor.Slot == MAX_uint16)
continue;
auto& d = header.DescriptorInfo.DescriptorTypes[header.DescriptorInfo.DescriptorTypesCount++];
d.Binding = descriptor.Binding;
d.Set = stageSet;
@@ -694,12 +699,15 @@ bool ShaderCompilerVulkan::CompileShader(ShaderFunctionMeta& meta, WritePermutat
switch (descriptor.BindingType)
{
case SpirvShaderResourceBindingType::CB:
ASSERT_LOW_LAYER(descriptor.Slot >= 0 && descriptor.Slot < GPU_MAX_CB_BINDED);
bindings.UsedCBsMask |= 1 << descriptor.Slot;
break;
case SpirvShaderResourceBindingType::SRV:
ASSERT_LOW_LAYER(descriptor.Slot >= 0 && descriptor.Slot < GPU_MAX_SR_BINDED);
bindings.UsedSRsMask |= 1 << descriptor.Slot;
break;
case SpirvShaderResourceBindingType::UAV:
ASSERT_LOW_LAYER(descriptor.Slot >= 0 && descriptor.Slot < GPU_MAX_UA_BINDED);
bindings.UsedUAsMask |= 1 << descriptor.Slot;
break;
}

View File

@@ -542,7 +542,7 @@ void Terrain::Draw(RenderContext& renderContext)
Matrix::Invert(chunk->GetWorld(), worldToLocal);
BoundingBox::Transform(chunk->GetBounds(), worldToLocal, localBounds);
BoundingSphere::FromBox(chunk->GetBounds(), chunkSphere);
GlobalSurfaceAtlasPass::Instance()->RasterizeActor(this, chunk, chunkSphere, chunk->GetWorld(), localBounds, 1 << 2);
GlobalSurfaceAtlasPass::Instance()->RasterizeActor(this, chunk, chunkSphere, chunk->GetWorld(), localBounds, 1 << 2, false);
}
}
return;

View File

@@ -170,8 +170,7 @@ const Char* ShaderGraphUtilities::GenerateShaderResources(TextWriterUnicode& wri
format = TEXT("Texture3D {0} : register(t{1});");
break;
case MaterialParameterType::GlobalSDF:
format = TEXT("Texture3D<float> {0}_Tex[4] : register(t{1});");
registers = 4;
format = TEXT("Texture3D<float> {0}_Tex : register(t{1});");
zeroOffset = false;
break;
}

View File

@@ -242,7 +242,6 @@ float3 inscatter(inout float3 x, inout float t, float3 v, float3 s, out float r,
float muHoriz = -sqrt(1.0 - (RadiusGround / r) * (RadiusGround / r));
if (abs(mu - muHoriz) < epsilon)
{
mu = muHoriz - epsilon;
r0 = sqrt(r * r + t * t + 2.0 * r * t * mu);
mu0 = (r * mu + t) / r0;
@@ -299,7 +298,7 @@ bool intersectAtmosphere(in float3 viewPosition, in float3 d, out float offset,
maxPathLength = s + q;
return true;
}
else if (s >= 0)
if (s >= 0)
{
// ray starts outside in front of sphere, hit is possible
float m2 = l2 - (s * s);

View File

@@ -134,8 +134,8 @@ SamplerComparisonState ShadowSamplerPCF : register(s5);
// Structure that contains information about GBuffer
struct GBufferData
{
float4 ViewInfo; // x-1/Projection[0,0] y-1/Projection[1,1] z-(Far / (Far - Near) w-(-Far * Near) / (Far - Near) / Far)
float4 ScreenSize; // x-Width y-Height z-1/Width w-1/Height
float4 ViewInfo; // x-1/Projection[0,0], y-1/Projection[1,1], z-(Far / (Far - Near), w-(-Far * Near) / (Far - Near) / Far)
float4 ScreenSize; // x-Width, y-Height, z-1/Width, w-1/Height
float3 ViewPos; // view position (in world space)
float ViewFar; // view far plane distance (in world space)
float4x4 InvViewMatrix; // inverse view matrix (4 rows by 4 columns)

View File

@@ -13,8 +13,9 @@
#include "./Flax/Math.hlsl"
#include "./Flax/Octahedral.hlsl"
#define DDGI_PROBE_STATE_ACTIVE 0
#define DDGI_PROBE_STATE_INACTIVE 1
#define DDGI_PROBE_STATE_INACTIVE 0.0f
#define DDGI_PROBE_STATE_ACTIVATED 0.2f
#define DDGI_PROBE_STATE_ACTIVE 1.0f
#define DDGI_PROBE_RESOLUTION_IRRADIANCE 6 // Resolution (in texels) for probe irradiance data (excluding 1px padding on each side)
#define DDGI_PROBE_RESOLUTION_DISTANCE 14 // Resolution (in texels) for probe distance data (excluding 1px padding on each side)
#define DDGI_SRGB_BLENDING 1 // Enables blending in sRGB color space, otherwise irradiance blending is done in linear space
@@ -23,8 +24,7 @@
struct DDGIData
{
float4 ProbesOriginAndSpacing[4];
int4 ProbesScrollOffsets[4];
int4 ProbeScrollDirections[4];
int4 ProbesScrollOffsets[4]; // w unused
uint3 ProbesCounts;
uint CascadesCount;
float IrradianceGamma;
@@ -32,7 +32,7 @@ struct DDGIData
float RayMaxDistance;
float IndirectLightingIntensity;
float4 RaysRotation;
float3 ViewDir;
float3 ViewPos;
uint RaysCount;
float3 FallbackIrradiance;
float Padding0;
@@ -77,7 +77,7 @@ uint2 GetDDGIProbeTexelCoords(DDGIData data, uint cascadeIndex, uint probeIndex)
uint GetDDGIScrollingProbeIndex(DDGIData data, uint cascadeIndex, uint3 probeCoords)
{
// Probes are scrolled on edges to stabilize GI when camera moves
return GetDDGIProbeIndex(data, (probeCoords + data.ProbesScrollOffsets[cascadeIndex].xyz + data.ProbesCounts) % data.ProbesCounts);
return GetDDGIProbeIndex(data, ((int3)probeCoords + data.ProbesScrollOffsets[cascadeIndex].xyz + (int3)data.ProbesCounts) % (int3)data.ProbesCounts);
}
float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCoords)
@@ -90,7 +90,7 @@ float3 GetDDGIProbeWorldPosition(DDGIData data, uint cascadeIndex, uint3 probeCo
}
// Loads probe probe state
float LoadDDGIProbeState(DDGIData data, Texture2D<float4> probesState, uint cascadeIndex, uint probeIndex)
float LoadDDGIProbeState(DDGIData data, Texture2D<snorm float4> probesState, uint cascadeIndex, uint probeIndex)
{
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
@@ -98,11 +98,12 @@ float LoadDDGIProbeState(DDGIData data, Texture2D<float4> probesState, uint casc
}
// Loads probe world-space position (XYZ) and probe state (W)
float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D<float4> probesState, uint cascadeIndex, uint probeIndex, uint3 probeCoords)
float4 LoadDDGIProbePositionAndState(DDGIData data, Texture2D<snorm float4> probesState, uint cascadeIndex, uint probeIndex, uint3 probeCoords)
{
int2 probeDataCoords = GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex);
float4 probeState = probesState.Load(int3(probeDataCoords, 0));
probeState.xyz += GetDDGIProbeWorldPosition(data, cascadeIndex, probeCoords);
probeState.xyz *= data.ProbesOriginAndSpacing[cascadeIndex].w; // Probe offset is [-1;1] within probes spacing
probeState.xyz += GetDDGIProbeWorldPosition(data, cascadeIndex, probeCoords); // Place probe on a grid
return probeState;
}
@@ -119,8 +120,9 @@ float2 GetDDGIProbeUV(DDGIData data, uint cascadeIndex, uint probeIndex, float2
}
// Samples DDGI probes volume at the given world-space position and returns the irradiance.
// rand - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias, float dither = 0.0f)
// bias - scales the bias vector to the initial sample point to reduce self-shading artifacts
// dither - randomized per-pixel value in range 0-1, used to smooth dithering for cascades blending
float3 SampleDDGIIrradiance(DDGIData data, Texture2D<snorm float4> probesState, Texture2D<float4> probesDistance, Texture2D<float4> probesIrradiance, float3 worldPosition, float3 worldNormal, float bias = 0.2f, float dither = 0.0f)
{
// Select the highest cascade that contains the sample location
uint cascadeIndex = 0;
@@ -145,7 +147,7 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
uint probeIndex = GetDDGIScrollingProbeIndex(data, cascadeIndex, probeCoords);
float4 probeState = probesState.Load(int3(GetDDGIProbeTexelCoords(data, cascadeIndex, probeIndex), 0));
probeStates[i] = probeState;
if (probeState.w == DDGI_PROBE_STATE_ACTIVE)
if (probeState.w != DDGI_PROBE_STATE_INACTIVE)
activeCount++;
}
@@ -162,7 +164,8 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
float3 probesExtent = (data.ProbesCounts - 1) * (probesSpacing * 0.5f);
// Bias the world-space position to reduce artifacts
float3 surfaceBias = (worldNormal * bias) + (data.ViewDir * (bias * -4.0f));
float3 viewDir = normalize(data.ViewPos - worldPosition);
float3 surfaceBias = (worldNormal * 0.2f + viewDir * 0.8f) * (0.75f * probesSpacing * bias);
float3 biasedWorldPosition = worldPosition + surfaceBias;
// Get the grid coordinates of the probe nearest the biased world position
@@ -182,13 +185,14 @@ float3 SampleDDGIIrradiance(DDGIData data, Texture2D<float4> probesState, Textur
float4 probeState = probeStates[i];
if (probeState.w == DDGI_PROBE_STATE_INACTIVE)
continue;
probeState.xyz *= probesSpacing; // Probe offset is [-1;1] within probes spacing
float3 probeBasePosition = baseProbeWorldPosition + ((probeCoords - baseProbeCoords) * probesSpacing);
float3 probePosition = probeBasePosition + probeState.xyz;
// Calculate the distance and direction from the (biased and non-biased) shading point and the probe
float3 worldPosToProbe = normalize(probePosition - worldPosition);
float3 biasedPosToProbe = normalize(probePosition - biasedWorldPosition);
float biasedPosToProbeDist = length(probePosition - biasedWorldPosition);
float biasedPosToProbeDist = length(probePosition - biasedWorldPosition) * 0.95f;
// Smooth backface test
float weight = Square(dot(worldPosToProbe, worldNormal) * 0.5f + 0.5f);

View File

@@ -17,8 +17,8 @@
#include "./Flax/GI/DDGI.hlsl"
// This must match C++
#define DDGI_TRACE_RAYS_LIMIT 512 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_TRACE_RAYS_GROUP_SIZE_X 32
#define DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT 4096 // Maximum amount of probes to update at once during rays tracing and blending
#define DDGI_TRACE_RAYS_LIMIT 256 // Limit of rays per-probe (runtime value can be smaller)
#define DDGI_PROBE_UPDATE_BORDERS_GROUP_SIZE 8
#define DDGI_PROBE_CLASSIFY_GROUP_SIZE 32
@@ -30,6 +30,7 @@ GBufferData GBuffer;
float2 Padding0;
float ResetBlend;
float TemporalTime;
int4 ProbeScrollClears[4];
META_CB_END
META_CB_BEGIN(1, Data1)
@@ -56,12 +57,19 @@ float3 GetProbeRayDirection(DDGIData data, uint rayIndex)
return normalize(QuaternionRotate(data.RaysRotation, direction));
}
// Checks if the probe states are equal
bool GetProbeState(float a, float b)
{
return abs(a - b) < 0.05f;
}
#ifdef _CS_Classify
RWTexture2D<float4> RWProbesState : register(u0);
RWTexture2D<snorm float4> RWProbesState : register(u0);
RWByteAddressBuffer RWActiveProbes : register(u1);
Texture3D<float> GlobalSDFTex[4] : register(t0);
Texture3D<float> GlobalSDFMip[4] : register(t4);
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
// Compute shader for updating probes state between active and inactive.
META_CS(true, FEATURE_LEVEL_SM5)
@@ -79,9 +87,10 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Load probe state and position
float4 probeState = RWProbesState[probeDataCoords];
probeState.xyz *= probesSpacing; // Probe offset is [-1;1] within probes spacing
float3 probeBasePosition = GetDDGIProbeWorldPosition(DDGI, CascadeIndex, probeCoords);
float3 probePosition = probeBasePosition + probeState.xyz;
probeState.w = DDGI_PROBE_STATE_ACTIVE;
float4 probeStateOld = probeState;
// Use Global SDF to quickly get distance and direction to the scene geometry
float sdf;
@@ -127,9 +136,61 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
// Reset relocation
probeState.xyz = float3(0, 0, 0);
}
// Check if probe was scrolled
int3 probeScrollClears = ProbeScrollClears[CascadeIndex].xyz;
bool wasScrolled = false;
UNROLL
for (uint planeIndex = 0; planeIndex < 3; planeIndex++)
{
int probeCount = (int)DDGI.ProbesCounts[planeIndex];
int newCord = (int)probeCoords[planeIndex] + probeScrollClears[planeIndex];
if (newCord < 0 || newCord >= probeCount)
{
wasScrolled = true;
}
}
// If probe was in different location or was inactive last frame then mark it as activated
bool wasInactive = probeStateOld.w == DDGI_PROBE_STATE_INACTIVE;
bool wasRelocated = distance(probeState.xyz, probeStateOld.xyz) > 1.0f;
probeState.w = wasInactive || wasScrolled || wasRelocated ? DDGI_PROBE_STATE_ACTIVATED : DDGI_PROBE_STATE_ACTIVE;
}
probeState.xyz /= probesSpacing;
RWProbesState[probeDataCoords] = probeState;
// Collect active probes
if (probeState.w != DDGI_PROBE_STATE_INACTIVE)
{
uint activeProbeIndex;
RWActiveProbes.InterlockedAdd(0, 1, activeProbeIndex); // Counter at 0
RWActiveProbes.Store(activeProbeIndex * 4 + 4, DispatchThreadId.x);
}
}
#endif
#ifdef _CS_UpdateProbesInitArgs
RWBuffer<uint> UpdateProbesInitArgs : register(u0);
ByteAddressBuffer ActiveProbes : register(t0);
// Compute shader for building indirect dispatch arguments for CS_TraceRays and CS_UpdateProbes.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(1, 1, 1)]
void CS_UpdateProbesInitArgs()
{
uint probesCount = DDGI.ProbesCounts.x * DDGI.ProbesCounts.y * DDGI.ProbesCounts.z;
uint activeProbesCount = ActiveProbes.Load(0);
uint arg = 0;
for (uint probesOffset = 0; probesOffset < activeProbesCount; probesOffset += DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT)
{
uint probesBatchSize = min(activeProbesCount - probesOffset, DDGI_TRACE_RAYS_PROBES_COUNT_LIMIT);
UpdateProbesInitArgs[arg++] = probesBatchSize;
UpdateProbesInitArgs[arg++] = 1;
UpdateProbesInitArgs[arg++] = 1;
}
}
#endif
@@ -138,22 +199,28 @@ void CS_Classify(uint3 DispatchThreadId : SV_DispatchThreadID)
RWTexture2D<float4> RWProbesTrace : register(u0);
Texture3D<float> GlobalSDFTex[4] : register(t0);
Texture3D<float> GlobalSDFMip[4] : register(t4);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t8);
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t9);
Texture2D GlobalSurfaceAtlasDepth : register(t10);
Texture2D GlobalSurfaceAtlasTex : register(t11);
Texture2D<float4> ProbesState : register(t12);
TextureCube Skybox : register(t13);
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
ByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(t3);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
Texture2D GlobalSurfaceAtlasDepth : register(t5);
Texture2D GlobalSurfaceAtlasTex : register(t6);
Texture2D<snorm float4> ProbesState : register(t7);
TextureCube Skybox : register(t8);
ByteAddressBuffer ActiveProbes : register(t9);
// Compute shader for tracing rays for probes using Global SDF and Global Surface Atlas.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(DDGI_TRACE_RAYS_GROUP_SIZE_X, 1, 1)]
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=96)
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=128)
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=192)
META_PERMUTATION_1(DDGI_TRACE_RAYS_COUNT=256)
[numthreads(1, DDGI_TRACE_RAYS_COUNT, 1)]
void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint rayIndex = DispatchThreadId.x;
uint probeIndex = DispatchThreadId.y + ProbeIndexOffset;
uint rayIndex = DispatchThreadId.y;
uint probeIndex = ActiveProbes.Load((DispatchThreadId.x + ProbeIndexOffset + 1) * 4);
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
@@ -182,7 +249,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
// Sample Global Surface Atlas to get the lighting at the hit location
float3 hitPosition = hit.GetHitPosition(trace);
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, RWGlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hitPosition, -probeRayDirection, surfaceThreshold);
radiance = float4(surfaceColor.rgb, hit.HitTime);
// Add some bias to prevent self occlusion artifacts in Chebyshev due to Global SDF being very incorrect in small scale
@@ -197,7 +264,7 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
}
// Write into probes trace results
RWProbesTrace[uint2(rayIndex, DispatchThreadId.y)] = radiance;
RWProbesTrace[uint2(rayIndex, DispatchThreadId.x)] = radiance;
}
#endif
@@ -207,17 +274,19 @@ void CS_TraceRays(uint3 DispatchThreadId : SV_DispatchThreadID)
#if DDGI_PROBE_UPDATE_MODE == 0
// Update irradiance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_IRRADIANCE
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
#else
// Update distance
#define DDGI_PROBE_RESOLUTION DDGI_PROBE_RESOLUTION_DISTANCE
groupshared float CachedProbesTraceDistance[DDGI_TRACE_RAYS_LIMIT];
#endif
groupshared float4 CachedProbesTraceRadiance[DDGI_TRACE_RAYS_LIMIT];
groupshared float3 CachedProbesTraceDirection[DDGI_TRACE_RAYS_LIMIT];
RWTexture2D<float4> RWOutput : register(u0);
Texture2D<float4> ProbesState : register(t0);
Texture2D<snorm float4> ProbesState : register(t0);
Texture2D<float4> ProbesTrace : register(t1);
ByteAddressBuffer ActiveProbes : register(t2);
// Compute shader for updating probes irradiance or distance texture.
META_CS(true, FEATURE_LEVEL_SM5)
@@ -229,13 +298,9 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
// GroupThreadId.xy - coordinates of the probe texel: [0; DDGI_PROBE_RESOLUTION)
// GroupId.x - index of the thread group which is probe index within a batch: [0; batchSize)
// GroupIndex.x - index of the thread within a thread group: [0; DDGI_PROBE_RESOLUTION * DDGI_PROBE_RESOLUTION)
// Get probe index and atlas location in the atlas
uint probeIndex = GroupId.x + ProbeIndexOffset;
uint probeIndex = ActiveProbes.Load((GroupId.x + ProbeIndexOffset + 1) * 4);
uint3 probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
probeIndex = GetDDGIScrollingProbeIndex(DDGI, CascadeIndex, probeCoords);
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
// Skip disabled probes
bool skip = false;
@@ -243,6 +308,15 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
if (probeState == DDGI_PROBE_STATE_INACTIVE)
skip = true;
#if DDGI_PROBE_UPDATE_MODE == 0
uint backfacesCount = 0;
uint backfacesLimit = uint(DDGI.RaysCount * 0.1f);
#else
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
float distanceLimit = length(probesSpacing) * 1.5f;
#endif
BRANCH
if (!skip)
{
// Load trace rays results into shared memory to reuse across whole thread group (raysCount per thread)
@@ -252,36 +326,20 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
for (uint i = 0; i < raysCount; i++)
{
uint rayIndex = raysStart + i;
#if DDGI_PROBE_UPDATE_MODE == 0
CachedProbesTraceRadiance[rayIndex] = ProbesTrace[uint2(rayIndex, GroupId.x)];
#else
float rayDistance = ProbesTrace[uint2(rayIndex, GroupId.x)].w;
CachedProbesTraceDistance[rayIndex] = min(abs(rayDistance), distanceLimit);
#endif
CachedProbesTraceDirection[rayIndex] = GetProbeRayDirection(DDGI, rayIndex);
}
}
GroupMemoryBarrierWithGroupSync();
if (skip)
return;
// Clear probes that have been scrolled to a new positions
int3 probesScrollOffsets = DDGI.ProbesScrollOffsets[CascadeIndex].xyz;
int probeScrollClear = DDGI.ProbesScrollOffsets[CascadeIndex].w;
int3 probeScrollDirections = DDGI.ProbeScrollDirections[CascadeIndex].xyz;
bool scrolled = false;
UNROLL
for (uint planeIndex = 0; planeIndex < 3; planeIndex++)
{
if (probeScrollClear & (1 << planeIndex))
{
int scrollOffset = probesScrollOffsets[planeIndex];
int scrollDirection = probeScrollDirections[planeIndex];
uint probeCount = DDGI.ProbesCounts[planeIndex];
uint coord = (probeCount + (scrollDirection ? (scrollOffset - 1) : (scrollOffset % probeCount))) % probeCount;
if (probeCoords[planeIndex] == coord)
scrolled = true;
}
}
if (scrolled)
{
RWOutput[outputCoords] = float4(0, 0, 0, 0);
}
probeCoords = GetDDGIProbeCoords(DDGI, probeIndex);
uint2 outputCoords = GetDDGIProbeTexelCoords(DDGI, CascadeIndex, probeIndex) * (DDGI_PROBE_RESOLUTION + 2) + 1 + GroupThreadId.xy;
// Calculate octahedral projection for probe (unwraps spherical projection into a square)
float2 octahedralCoords = GetOctahedralCoords(GroupThreadId.xy, DDGI_PROBE_RESOLUTION);
@@ -289,21 +347,14 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
// Loop over rays
float4 result = float4(0, 0, 0, 0);
#if DDGI_PROBE_UPDATE_MODE == 0
uint backfacesCount = 0;
uint backfacesLimit = uint(DDGI.RaysCount * 0.1f);
#else
float probesSpacing = DDGI.ProbesOriginAndSpacing[CascadeIndex].w;
float distanceLimit = length(probesSpacing) * 1.5f;
#endif
LOOP
for (uint rayIndex = 0; rayIndex < DDGI.RaysCount; rayIndex++)
{
float3 rayDirection = CachedProbesTraceDirection[rayIndex];
float rayWeight = max(dot(octahedralDirection, rayDirection), 0.0f);
float4 rayRadiance = CachedProbesTraceRadiance[rayIndex];
#if DDGI_PROBE_UPDATE_MODE == 0
float4 rayRadiance = CachedProbesTraceRadiance[rayIndex];
if (rayRadiance.w < 0.0f)
{
// Count backface hits
@@ -325,7 +376,7 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
rayWeight = pow(rayWeight, 10.0f);
// Add distance (R), distance^2 (G) and weight (A)
float rayDistance = min(abs(rayRadiance.w), distanceLimit);
float rayDistance = CachedProbesTraceDistance[rayIndex];
result += float4(rayDistance * rayWeight, (rayDistance * rayDistance) * rayWeight, 0.0f, rayWeight);
#endif
}
@@ -334,11 +385,16 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
float epsilon = (float)DDGI.RaysCount * 1e-9f;
result.rgb *= 1.0f / (2.0f * max(result.a, epsilon));
// Blend current value with the previous probe data
// Load current probe value
float3 previous = RWOutput[outputCoords].rgb;
bool wasActivated = GetProbeState(probeState, DDGI_PROBE_STATE_ACTIVATED);
if (ResetBlend || wasActivated)
previous = float3(0, 0, 0);
// Blend current value with the previous probe data
float historyWeight = DDGI.ProbeHistoryWeight;
//historyWeight = 0.0f;
if (ResetBlend || scrolled || dot(previous, previous) == 0)
if (ResetBlend || wasActivated || dot(previous, previous) == 0)
historyWeight = 0.0f;
#if DDGI_PROBE_UPDATE_MODE == 0
result *= DDGI.IndirectLightingIntensity;
@@ -351,18 +407,18 @@ void CS_UpdateProbes(uint3 GroupThreadId : SV_GroupThreadID, uint3 GroupId : SV_
if (irradianceDeltaMax > 0.2f)
{
// Reduce history weight after significant lighting change
historyWeight = max(historyWeight - 0.7f, 0.0f);
historyWeight = max(historyWeight - 0.9f, 0.0f);
}
if (irradianceDeltaLen > 2.0f)
{
// Reduce flickering during rapid brightness changes
result.rgb = previous + (irradianceDelta * 0.25f);
//result.rgb = previous + (irradianceDelta * 0.25f);
}
float3 resultDelta = (1.0f - historyWeight) * irradianceDelta;
if (Max3(result.rgb) < Max3(previous))
resultDelta = min(max(abs(resultDelta), 1.0f / 1024.0f), abs(irradianceDelta)) * sign(resultDelta);
result = float4(previous + resultDelta, 1.0f);
//result = float4(lerp(result.rgb, previous.rgb, historyWeight), 1.0f);
//result = float4(previous + resultDelta, 1.0f);
result = float4(lerp(result.rgb, previous.rgb, historyWeight), 1.0f);
#else
result = float4(lerp(result.rg, previous.rg, historyWeight), 0.0f, 1.0f);
#endif
@@ -445,7 +501,7 @@ void CS_UpdateBorders(uint3 DispatchThreadId : SV_DispatchThreadID)
#include "./Flax/Random.hlsl"
#include "./Flax/LightingCommon.hlsl"
Texture2D<float4> ProbesState : register(t4);
Texture2D<snorm float4> ProbesState : register(t4);
Texture2D<float4> ProbesDistance : register(t5);
Texture2D<float4> ProbesIrradiance : register(t6);
@@ -467,7 +523,7 @@ void PS_IndirectLighting(Quad_VS2PS input, out float4 output : SV_Target0)
}
// Sample irradiance
float bias = 1.0f;
float bias = 0.2f;
float dither = RandN2(input.TexCoord + TemporalTime).x;
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, gBuffer.WorldPos, gBuffer.Normal, bias, dither);

View File

@@ -7,7 +7,8 @@
#define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling
#define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4
#define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD_ENABLED 1 // Enables using tile normal threshold to prevent sampling pixels behind the view point (but might cause back artifacts)
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_WEIGHT_ENABLED 1 // Enables using tile normal to weight the samples
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD_ENABLED 0 // Enables using tile normal threshold to prevent sampling pixels behind the view point (but might cause back artifacts)
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.05f // Cut-off value for tiles transitions blending during sampling
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
@@ -24,6 +25,7 @@ struct GlobalSurfaceObject
float BoundsRadius;
float4x4 WorldToLocal;
float3 Extent;
bool UseVisibility;
uint TileOffsets[6];
uint DataSize; // count of float4s for object+tiles
};
@@ -48,7 +50,7 @@ GlobalSurfaceObject LoadGlobalSurfaceAtlasObject(Buffer<float4> objects, uint ob
float4 vector2 = objects.Load(objectAddress + 2);
float4 vector3 = objects.Load(objectAddress + 3);
float4 vector4 = objects.Load(objectAddress + 4);
float4 vector5 = objects.Load(objectAddress + 5); // w unused
float4 vector5 = objects.Load(objectAddress + 5);
GlobalSurfaceObject object = (GlobalSurfaceObject)0;
object.BoundsPosition = vector0.xyz;
object.BoundsRadius = vector0.w;
@@ -57,6 +59,7 @@ GlobalSurfaceObject LoadGlobalSurfaceAtlasObject(Buffer<float4> objects, uint ob
object.WorldToLocal[2] = float4(vector4.xyz, 0.0f);
object.WorldToLocal[3] = float4(vector2.w, vector3.w, vector4.w, 1.0f);
object.Extent = vector5.xyz;
object.UseVisibility = vector5.w > 0.5f;
uint vector1x = asuint(vector1.x);
uint vector1y = asuint(vector1.y);
uint vector1z = asuint(vector1.z);
@@ -107,14 +110,14 @@ float3 SampleGlobalSurfaceAtlasTex(Texture2D atlas, float2 atlasUV, float4 bilin
return float3(dot(sampleX, bilinearWeights), dot(sampleY, bilinearWeights), dot(sampleZ, bilinearWeights));
}
float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSurfaceTile tile, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold)
float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSurfaceObject object, GlobalSurfaceTile tile, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold)
{
#if GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD_ENABLED
#if GLOBAL_SURFACE_ATLAS_TILE_NORMAL_WEIGHT_ENABLED
// Tile normal weight based on the sampling angle
float3 tileNormal = normalize(mul(worldNormal, (float3x3)tile.WorldToLocal));
float normalWeight = saturate(dot(float3(0, 0, -1), tileNormal));
normalWeight = (normalWeight - GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD) / (1.0f - GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD);
if (normalWeight <= 0.0f)
if (normalWeight <= 0.0f && object.UseVisibility)
return 0;
#endif
@@ -123,6 +126,7 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
float tileDepth = tilePosition.z / tile.ViewBoundsSize.z;
float2 tileUV = saturate((tilePosition.xy / tile.ViewBoundsSize.xy) + 0.5f);
tileUV.y = 1.0 - tileUV.y;
tileUV = min(tileUV, 0.999999f);
float2 atlasUV = tileUV * tile.AtlasRectUV.zw + tile.AtlasRectUV.xy;
// Calculate bilinear weights
@@ -145,12 +149,13 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
depthVisibility[i] = 0.0f;
}
float sampleWeight = dot(depthVisibility, bilinearWeights);
#if GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD_ENABLED
#if GLOBAL_SURFACE_ATLAS_TILE_NORMAL_WEIGHT_ENABLED
if (object.UseVisibility)
sampleWeight *= normalWeight;
#endif
if (sampleWeight <= 0.0f)
return 0;
bilinearWeights = depthVisibility * bilinearWeights;
bilinearWeights *= depthVisibility;
//bilinearWeights = normalize(bilinearWeights);
// Sample atlas texture
@@ -163,7 +168,7 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
// Samples the Global Surface Atlas and returns the lighting (with opacity) at the given world location (and direction).
// surfaceThreshold - Additional threshold (in world-units) between object or tile size compared with input data (error due to SDF or LOD incorrect appearance)
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, Buffer<float4> culledObjects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, ByteAddressBuffer culledObjects, Buffer<float4> objects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal, float surfaceThreshold = 20.0f)
{
float4 result = float4(0, 0, 0, 0);
@@ -178,24 +183,22 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
}
// Read objects counter
float4 chunkHeader = culledObjects[objectsStart];
objectsStart++;
uint objectsCount = asuint(chunkHeader.x);
uint objectsCount = culledObjects.Load(objectsStart * 4);
if (objectsCount > data.ObjectsCount) // Prevents crashing - don't know why the data is invalid here (rare issue when moving fast though scene with terrain)
return result;
objectsStart++;
// Loop over culled objects inside the chunk
LOOP
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
{
// Cull point vs sphere
uint objectAddress = objectsStart;
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(culledObjects, objectAddress);
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(culledObjects, objectAddress);
objectsStart += objectSize;
uint objectAddress = culledObjects.Load(objectsStart * 4);
objectsStart++;
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(objects, objectAddress);
if (distance(objectBounds.xyz, worldPosition) > objectBounds.w)
continue;
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(culledObjects, objectAddress);
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(objects, objectAddress);
float3 localPosition = mul(float4(worldPosition, 1), object.WorldToLocal).xyz;
float3 localExtent = object.Extent + surfaceThreshold;
if (any(localPosition > localExtent) || any(localPosition < -localExtent))
@@ -221,57 +224,57 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBu
uint tileOffset = object.TileOffsets[localNormal.x > 0.0f ? 0 : 1];
if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[localNormal.y > 0.0f ? 2 : 3];
if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[localNormal.z > 0.0f ? 4 : 5];
if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
#else
uint tileOffset = object.TileOffsets[0];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[1];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[2];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[3];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[4];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
tileOffset = object.TileOffsets[5];
if (tileOffset != 0)
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(objects, objectAddress + tileOffset);
result += SampleGlobalSurfaceAtlasTile(data, object, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
}
#endif
}

View File

@@ -62,6 +62,22 @@ void PS_Clear(out float4 Light : SV_Target0, out float4 RT0 : SV_Target1, out fl
RT2 = float4(1, 0, 0, 0);
}
#ifdef _PS_ClearLighting
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
Texture2D Texture : register(t7);
// Pixel shader for Global Surface Atlas clearing
META_PS(true, FEATURE_LEVEL_SM5)
float4 PS_ClearLighting(AtlasVertexOutput input) : SV_Target
{
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(GlobalSurfaceAtlasObjects, input.TileAddress);
float2 atlasUV = input.TileUV * tile.AtlasRectUV.zw + tile.AtlasRectUV.xy;
return Texture.Sample(SamplerPointClamp, atlasUV);
}
#endif
#ifdef _PS_Lighting
#include "./Flax/GBuffer.hlsl"
@@ -71,15 +87,15 @@ void PS_Clear(out float4 Light : SV_Target0, out float4 RT0 : SV_Target1, out fl
// GBuffer+Depth at 0-3 slots
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
#if INDIRECT_LIGHT
Texture2D<float4> ProbesState : register(t5);
Texture2D<snorm float4> ProbesState : register(t5);
Texture2D<float4> ProbesDistance : register(t6);
Texture2D<float4> ProbesIrradiance : register(t7);
#else
Texture3D<float> GlobalSDFTex[4] : register(t5);
Texture3D<float> GlobalSDFMip[4] : register(t9);
Texture3D<float> GlobalSDFTex : register(t5);
Texture3D<float> GlobalSDFMip : register(t6);
#endif
// Pixel shader for Global Surface Atlas shading with direct light contribution
// Pixel shader for Global Surface Atlas shading
META_PS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(RADIAL_LIGHT=0)
META_PERMUTATION_1(RADIAL_LIGHT=1)
@@ -125,7 +141,7 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target
// Calculate lighting
float3 diffuseColor = GetDiffuseColor(gBuffer);
diffuseColor = min(diffuseColor, 0.9f); // Nothing reflects diffuse like perfectly in the real world (ensure to have energy loss at each light bounce)
diffuseColor = min(diffuseColor, 0.98f); // Nothing reflects diffuse like perfectly in the real world (ensure to have energy loss at each light bounce)
float3 diffuse = Diffuse_Lambert(diffuseColor);
float4 light = float4(diffuse * irradiance * gBuffer.AO, 1);
#else
@@ -159,7 +175,7 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target
// Shot a ray from texel into the light to see if there is any occluder
GlobalSDFTrace trace;
trace.Init(gBuffer.WorldPos + gBuffer.Normal * shadowBias, L, bias, toLightDst - bias);
GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace);
GlobalSDFHit hit = RayTraceGlobalSDF(GlobalSDF, GlobalSDFTex, GlobalSDFMip, trace, 2.0f);
shadowMask = hit.IsHit() ? LightShadowsStrength : 1;
}
else
@@ -187,24 +203,25 @@ float4 PS_Lighting(AtlasVertexOutput input) : SV_Target
#include "./Flax/Collisions.hlsl"
RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0);
RWBuffer<float4> RWGlobalSurfaceAtlasCulledObjects : register(u1);
RWByteAddressBuffer RWGlobalSurfaceAtlasCulledObjects : register(u1);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t0);
#define GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE 32 // Amount of objects to cache locally per-thread for culling
// Compute shader for culling objects into chunks
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)]
void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_CullObjects(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 chunkCoord = DispatchThreadId;
uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4;
if (chunkAddress == 0)
return; // Skip chunk at 0,0,0 (used for counter)
float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize;
float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize;
// Count objects data size in this chunk (amount of float4s)
uint objectsSize = 0, objectAddress = 0, objectsCount = 0;
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reude chunk size to get smaller objects count per chunk)?
// Count objects in this chunk
uint objectAddress = 0, objectsCount = 0;
// TODO: pre-cull objects within a thread group
uint localCulledObjects[GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE];
LOOP
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
{
@@ -212,22 +229,22 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
{
objectsSize += objectSize;
localCulledObjects[objectsCount % GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE] = objectAddress;
objectsCount++;
}
objectAddress += objectSize;
}
if (objectsSize == 0)
if (objectsCount == 0)
{
// Empty chunk
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0);
return;
}
objectsSize++; // Include objects count before actual objects data
// Allocate object data size in the buffer
uint objectsStart;
RWGlobalSurfaceAtlasChunks.InterlockedAdd(0, objectsSize, objectsStart);
uint objectsSize = objectsCount + 1; // Include objects count before actual objects data
RWGlobalSurfaceAtlasCulledObjects.InterlockedAdd(0, objectsSize, objectsStart); // Counter at 0
if (objectsStart + objectsSize > CulledObjectsCapacity)
{
// Not enough space in the buffer
@@ -238,11 +255,24 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
// Write object data start
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, objectsStart);
// Write objects count before actual objects data
RWGlobalSurfaceAtlasCulledObjects[objectsStart] = float4(asfloat(objectsCount), 0, 0, 0);
objectsStart++;
// Write objects count before actual objects indices
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectsCount);
// Copy objects data in this chunk
if (objectsCount <= GLOBAL_SURFACE_ATLAS_CULL_LOCAL_SIZE)
{
// Reuse locally cached objects
LOOP
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
{
objectAddress = localCulledObjects[objectIndex];
objectsStart++;
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
}
}
else
{
// Brute-force culling
objectAddress = 0;
LOOP
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
@@ -251,27 +281,26 @@ void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
{
for (uint i = 0; i < objectSize; i++)
{
RWGlobalSurfaceAtlasCulledObjects[objectsStart + i] = GlobalSurfaceAtlasObjects[objectAddress + i];
}
objectsStart += objectSize;
objectsStart++;
RWGlobalSurfaceAtlasCulledObjects.Store(objectsStart * 4, objectAddress);
}
objectAddress += objectSize;
}
}
}
#endif
#ifdef _PS_Debug
Texture3D<float> GlobalSDFTex[4] : register(t0);
Texture3D<float> GlobalSDFMip[4] : register(t4);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t8);
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t9);
Texture2D GlobalSurfaceAtlasDepth : register(t10);
Texture2D GlobalSurfaceAtlasTex : register(t11);
TextureCube Skybox : register(t12);
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t2);
ByteAddressBuffer GlobalSurfaceAtlasCulledObjects : register(t3);
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
Texture2D GlobalSurfaceAtlasTex : register(t5);
Texture2D GlobalSurfaceAtlasDepth : register(t6);
TextureCube Skybox : register(t7);
// Pixel shader for Global Surface Atlas debug drawing
META_PS(true, FEATURE_LEVEL_SM5)
@@ -295,7 +324,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
{
// Sample Global Surface Atlas at the hit location
float surfaceThreshold = GetGlobalSurfaceAtlasThreshold(GlobalSDF, hit);
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
color = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay, surfaceThreshold).rgb;
//color = hit.HitNormal * 0.5f + 0.5f;
}
else

View File

@@ -59,20 +59,27 @@ struct GlobalSDFHit
}
};
void GetGlobalSDFCascadeUV(const GlobalSDFData data, uint cascade, float3 worldPosition, out float cascadeMaxDistance, out float3 cascadeUV, out float3 textureUV)
{
float4 cascadePosDistance = data.CascadePosDistance[cascade];
float3 posInCascade = worldPosition - cascadePosDistance.xyz;
cascadeMaxDistance = cascadePosDistance.w * 2;
cascadeUV = saturate(posInCascade / cascadeMaxDistance + 0.5f);
textureUV = float3(((float)cascade + cascadeUV.x) / (float)data.CascadesCount, cascadeUV.y, cascadeUV.z); // cascades are placed next to each other on X axis
}
// Samples the Global SDF and returns the distance to the closest surface (in world units) at the given world location.
float SampleGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4], float3 worldPosition)
float SampleGlobalSDF(const GlobalSDFData data, Texture3D<float> tex, float3 worldPosition)
{
float distance = data.CascadePosDistance[3].w * 2.0f;
if (distance <= 0.0f)
return GLOBAL_SDF_WORLD_SIZE;
UNROLL
for (uint cascade = 0; cascade < data.CascadesCount; cascade++)
{
float4 cascadePosDistance = data.CascadePosDistance[cascade];
float cascadeMaxDistance = cascadePosDistance.w * 2;
float3 posInCascade = worldPosition - cascadePosDistance.xyz;
float3 cascadeUV = posInCascade / cascadeMaxDistance + 0.5f;
float cascadeDistance = tex[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float cascadeMaxDistance;
float3 cascadeUV, textureUV;
GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV);
float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (cascadeDistance < 1.0f && !any(cascadeUV < 0) && !any(cascadeUV > 1))
{
distance = cascadeDistance * cascadeMaxDistance;
@@ -83,29 +90,27 @@ float SampleGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4], float3
}
// Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector.
float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex[4], float3 worldPosition, out float distance)
float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex, float3 worldPosition, out float distance)
{
float3 gradient = float3(0, 0.00001f, 0);
distance = GLOBAL_SDF_WORLD_SIZE;
if (data.CascadePosDistance[3].w <= 0.0f)
return gradient;
UNROLL
for (uint cascade = 0; cascade < data.CascadesCount; cascade++)
{
float4 cascadePosDistance = data.CascadePosDistance[cascade];
float cascadeMaxDistance = cascadePosDistance.w * 2;
float3 posInCascade = worldPosition - cascadePosDistance.xyz;
float3 cascadeUV = posInCascade / cascadeMaxDistance + 0.5f;
float cascadeDistance = tex[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float cascadeMaxDistance;
float3 cascadeUV, textureUV;
GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV);
float cascadeDistance = tex.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (cascadeDistance < 0.9f && !any(cascadeUV < 0) && !any(cascadeUV > 1))
{
float texelOffset = 1.0f / data.Resolution;
float xp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x + texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float xn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x - texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float yp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y + texelOffset, cascadeUV.z), 0).x;
float yn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y - texelOffset, cascadeUV.z), 0).x;
float zp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z + texelOffset), 0).x;
float zn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z - texelOffset), 0).x;
float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x;
float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x;
float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x;
float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x;
float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x;
float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x;
gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeMaxDistance;
distance = cascadeDistance * cascadeMaxDistance;
break;
@@ -115,7 +120,7 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex[4]
}
// Samples the Global SDF and returns the gradient vector (derivative) at the given world location. Normalize it to get normal vector.
float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex[4], Texture3D<float> mips[4], float3 worldPosition, out float distance)
float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex, Texture3D<float> mip, float3 worldPosition, out float distance)
{
float3 gradient = float3(0, 0.00001f, 0);
distance = GLOBAL_SDF_WORLD_SIZE;
@@ -123,28 +128,26 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex[4]
return gradient;
float chunkSizeDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_SIZE / data.Resolution; // Size of the chunk in SDF distance (0-1)
float chunkMarginDistance = (float)GLOBAL_SDF_RASTERIZE_CHUNK_MARGIN / data.Resolution; // Size of the chunk margin in SDF distance (0-1)
UNROLL
for (uint cascade = 0; cascade < data.CascadesCount; cascade++)
{
float4 cascadePosDistance = data.CascadePosDistance[cascade];
float cascadeMaxDistance = cascadePosDistance.w * 2;
float3 posInCascade = worldPosition - cascadePosDistance.xyz;
float3 cascadeUV = posInCascade / cascadeMaxDistance + 0.5f;
float cascadeDistance = mips[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float cascadeMaxDistance;
float3 cascadeUV, textureUV;
GetGlobalSDFCascadeUV(data, cascade, worldPosition, cascadeMaxDistance, cascadeUV, textureUV);
float cascadeDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (cascadeDistance < chunkSizeDistance && !any(cascadeUV < 0) && !any(cascadeUV > 1))
{
float cascadeDistanceTex = tex[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float cascadeDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (cascadeDistanceTex < chunkMarginDistance * 2)
{
cascadeDistance = cascadeDistanceTex;
}
float texelOffset = 1.0f / data.Resolution;
float xp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x + texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float xn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x - texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float yp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y + texelOffset, cascadeUV.z), 0).x;
float yn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y - texelOffset, cascadeUV.z), 0).x;
float zp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z + texelOffset), 0).x;
float zn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z - texelOffset), 0).x;
float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x;
float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x;
float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x;
float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x;
float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x;
float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x;
gradient = float3(xp - xn, yp - yn, zp - zn) * cascadeMaxDistance;
distance = cascadeDistance * cascadeMaxDistance;
break;
@@ -154,7 +157,8 @@ float3 SampleGlobalSDFGradient(const GlobalSDFData data, Texture3D<float> tex[4]
}
// Ray traces the Global SDF.
GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4], Texture3D<float> mips[4], const GlobalSDFTrace trace)
// cascadeTraceStartBias - scales the trace start position offset (along the trace direction) by cascade voxel size (reduces artifacts on far cascades). Use it for shadow rays to prevent self-occlusion when tracing from object surface that looses quality in far cascades.
GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex, Texture3D<float> mip, const GlobalSDFTrace trace, float cascadeTraceStartBias = 0.0f)
{
GlobalSDFHit hit = (GlobalSDFHit)0;
hit.HitTime = -1.0f;
@@ -163,17 +167,16 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
float nextIntersectionStart = 0.0f;
float traceMaxDistance = min(trace.MaxDistance, data.CascadePosDistance[3].w * 2);
float3 traceEndPosition = trace.WorldPosition + trace.WorldDirection * traceMaxDistance;
UNROLL
for (uint cascade = 0; cascade < data.CascadesCount && hit.HitTime < 0.0f; cascade++)
{
float4 cascadePosDistance = data.CascadePosDistance[cascade];
float cascadeMaxDistance = cascadePosDistance.w * 2;
float voxelSize = data.CascadeVoxelSize[cascade];
float voxelExtent = voxelSize * 0.5f;
float cascadeMinStep = voxelSize;
float3 worldPosition = trace.WorldPosition + trace.WorldDirection * (voxelSize * cascadeTraceStartBias);
// Hit the cascade bounds to find the intersection points
float2 intersections = LineHitBox(trace.WorldPosition, traceEndPosition, cascadePosDistance.xyz - cascadePosDistance.www, cascadePosDistance.xyz + cascadePosDistance.www);
float2 intersections = LineHitBox(worldPosition, traceEndPosition, cascadePosDistance.xyz - cascadePosDistance.www, cascadePosDistance.xyz + cascadePosDistance.www);
intersections.xy *= traceMaxDistance;
intersections.x = max(intersections.x, nextIntersectionStart);
float stepTime = intersections.x;
@@ -193,15 +196,16 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
LOOP
for (; step < 250 && stepTime < intersections.y; step++)
{
float3 stepPosition = trace.WorldPosition + trace.WorldDirection * stepTime;
float3 stepPosition = worldPosition + trace.WorldDirection * stepTime;
// Sample SDF
float3 posInCascade = stepPosition - cascadePosDistance.xyz;
float3 cascadeUV = posInCascade / cascadeMaxDistance + 0.5f;
float stepDistance = mips[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float cascadeMaxDistance;
float3 cascadeUV, textureUV;
GetGlobalSDFCascadeUV(data, cascade, stepPosition, cascadeMaxDistance, cascadeUV, textureUV);
float stepDistance = mip.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (stepDistance < chunkSizeDistance)
{
float stepDistanceTex = tex[cascade].SampleLevel(SamplerLinearClamp, cascadeUV, 0);
float stepDistanceTex = tex.SampleLevel(SamplerLinearClamp, textureUV, 0);
if (stepDistanceTex < chunkMarginDistance * 2)
{
stepDistance = stepDistanceTex;
@@ -226,12 +230,12 @@ GlobalSDFHit RayTraceGlobalSDF(const GlobalSDFData data, Texture3D<float> tex[4]
{
// Calculate hit normal from SDF gradient
float texelOffset = 1.0f / data.Resolution;
float xp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x + texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float xn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x - texelOffset, cascadeUV.y, cascadeUV.z), 0).x;
float yp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y + texelOffset, cascadeUV.z), 0).x;
float yn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y - texelOffset, cascadeUV.z), 0).x;
float zp = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z + texelOffset), 0).x;
float zn = tex[cascade].SampleLevel(SamplerLinearClamp, float3(cascadeUV.x, cascadeUV.y, cascadeUV.z - texelOffset), 0).x;
float xp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x + texelOffset, textureUV.y, textureUV.z), 0).x;
float xn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x - texelOffset, textureUV.y, textureUV.z), 0).x;
float yp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y + texelOffset, textureUV.z), 0).x;
float yn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y - texelOffset, textureUV.z), 0).x;
float zp = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z + texelOffset), 0).x;
float zn = tex.SampleLevel(SamplerLinearClamp, float3(textureUV.x, textureUV.y, textureUV.z - texelOffset), 0).x;
hit.HitNormal = normalize(float3(xp - xn, yp - yn, zp - zn));
}
break;

View File

@@ -37,11 +37,15 @@ float3 CascadeCoordToPosMul;
int ObjectsCount;
float3 CascadeCoordToPosAdd;
int CascadeResolution;
float Padding0;
int CascadeIndex;
float CascadeVoxelSize;
int CascadeMipResolution;
int CascadeMipFactor;
uint4 Objects[GLOBAL_SDF_RASTERIZE_MODEL_MAX_COUNT / 4];
uint GenerateMipTexResolution;
uint GenerateMipCoordScale;
uint GenerateMipTexOffsetX;
uint GenerateMipMipOffsetX;
META_CB_END
float CombineDistanceToSDF(float sdf, float distanceToSDF)
@@ -97,10 +101,11 @@ META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(READ_SDF=0)
META_PERMUTATION_1(READ_SDF=1)
[numthreads(GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE)]
void CS_RasterizeModel(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_RasterizeModel(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 voxelCoord = ChunkCoord + DispatchThreadId;
float3 voxelWorldPos = voxelCoord * CascadeCoordToPosMul + CascadeCoordToPosAdd;
voxelCoord.x += CascadeIndex * CascadeResolution;
float minDistance = MaxDistance;
#if READ_SDF
minDistance *= GlobalSDFTex[voxelCoord];
@@ -123,10 +128,11 @@ Texture2D<float4> ObjectsTextures[GLOBAL_SDF_RASTERIZE_HEIGHTFIELD_MAX_COUNT] :
// Compute shader for rasterizing heightfield into Global SDF
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE)]
void CS_RasterizeHeightfield(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_RasterizeHeightfield(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 voxelCoord = ChunkCoord + DispatchThreadId;
float3 voxelWorldPos = voxelCoord * CascadeCoordToPosMul + CascadeCoordToPosAdd;
voxelCoord.x += CascadeIndex * CascadeResolution;
float minDistance = MaxDistance * GlobalSDFTex[voxelCoord];
float thickness = CascadeVoxelSize * -8;
for (uint i = 0; i < ObjectsCount; i++)
@@ -167,9 +173,10 @@ RWTexture3D<float> GlobalSDFTex : register(u0);
// Compute shader for clearing Global SDF chunk
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE, GLOBAL_SDF_RASTERIZE_GROUP_SIZE)]
void CS_ClearChunk(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_ClearChunk(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 voxelCoord = ChunkCoord + DispatchThreadId;
voxelCoord.x += CascadeIndex * CascadeResolution;
GlobalSDFTex[voxelCoord] = 1.0f;
}
@@ -182,21 +189,13 @@ Texture3D<float> GlobalSDFTex : register(t0);
float SampleSDF(uint3 voxelCoordMip, int3 offset)
{
#if SAMPLE_MIP
// Sampling Global SDF Mip
float resolution = CascadeMipResolution;
#else
// Sampling Global SDF Tex
voxelCoordMip *= CascadeMipFactor;
float resolution = CascadeResolution;
#endif
// Sample SDF
voxelCoordMip = (uint3)clamp((int3)voxelCoordMip + offset, 0, resolution - 1);
voxelCoordMip = (uint3)clamp((int3)voxelCoordMip * GenerateMipCoordScale + offset, 0, GenerateMipTexResolution - 1);
voxelCoordMip.x += GenerateMipTexOffsetX;
float result = GlobalSDFTex[voxelCoordMip].r;
// Extend by distance to the sampled texel location
float distanceInWorldUnits = length(offset) * (MaxDistance / resolution);
float distanceInWorldUnits = length(offset) * (MaxDistance / (float)GenerateMipTexResolution);
float distanceToVoxel = distanceInWorldUnits / MaxDistance;
result = CombineDistanceToSDF(result, distanceToVoxel);
@@ -205,10 +204,8 @@ float SampleSDF(uint3 voxelCoordMip, int3 offset)
// Compute shader for generating mip for Global SDF (uses flood fill algorithm)
META_CS(true, FEATURE_LEVEL_SM5)
META_PERMUTATION_1(SAMPLE_MIP=0)
META_PERMUTATION_1(SAMPLE_MIP=1)
[numthreads(GLOBAL_SDF_MIP_GROUP_SIZE, GLOBAL_SDF_MIP_GROUP_SIZE, GLOBAL_SDF_MIP_GROUP_SIZE)]
void CS_GenerateMip(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
void CS_GenerateMip(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint3 voxelCoordMip = DispatchThreadId;
float minDistance = SampleSDF(voxelCoordMip, int3(0, 0, 0));
@@ -221,6 +218,7 @@ void CS_GenerateMip(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
minDistance = min(minDistance, SampleSDF(voxelCoordMip, int3(0, -1, 0)));
minDistance = min(minDistance, SampleSDF(voxelCoordMip, int3(0, 0, -1)));
voxelCoordMip.x += GenerateMipMipOffsetX;
GlobalSDFMip[voxelCoordMip] = minDistance;
}
@@ -228,8 +226,8 @@ void CS_GenerateMip(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_Disp
#ifdef _PS_Debug
Texture3D<float> GlobalSDFTex[4] : register(t0);
Texture3D<float> GlobalSDFMip[4] : register(t4);
Texture3D<float> GlobalSDFTex : register(t0);
Texture3D<float> GlobalSDFMip : register(t1);
// Pixel shader for Global SDF debug drawing
META_PS(true, FEATURE_LEVEL_SM5)
@@ -241,6 +239,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
float mip = 0;
uint cascade = 0;
float distance01 = GlobalSDFTex[cascade].SampleLevel(SamplerLinearClamp, float3(input.TexCoord, zSlice), mip).x;
//float distance01 = GlobalSDFTex[cascade].SampleLevel(SamplerLinearClamp, float3((input.TexCoord.x + cascade) / (float)GlobalSDF.CascadesCount, input.TexCoord.y, zSlice), mip).x;
//float distance01 = GlobalSDFMip[cascade].SampleLevel(SamplerLinearClamp, float3(input.TexCoord, zSlice), mip).x;
float distance = distance01 * GlobalSDF.CascadePosDistance[cascade].w;
if (abs(distance) < 1)

View File

@@ -49,13 +49,12 @@ float3x3 EulerMatrix(float3 angles)
{
float3 s, c;
sincos(angles, s, c);
return float3x3(c.y * c.z + s.x * s.y * s.z, c.z * s.x * s.y - c.y * s.z, c.x * s.y,
c.x * s.z, c.x * c.z, -s.x,
-c.z * s.y + c.y * s.x * s.z, c.y * c.z * s.x + s.y * s.z, c.x * c.y);
return float3x3(c.y * c.z + s.x * s.y * s.z, c.z * s.x * s.y - c.y * s.z, c.x * s.y, c.x * s.z, c.x * c.z, -s.x, -c.z * s.y + c.y * s.x * s.z, c.y * c.z * s.x + s.y * s.z, c.x * c.y);
}
float4x4 QuaternionToMatrix(float4 q)
{
// @formatter:off
float x2 = q.x + q.x; float y2 = q.y + q.y; float z2 = q.z + q.z;
float xx = q.x * x2; float xy = q.x * y2; float xz = q.x * z2;
float yy = q.y * y2; float yz = q.y * z2; float zz = q.z * z2;
@@ -69,6 +68,7 @@ float4x4 QuaternionToMatrix(float4 q)
0.0f, 0.0f, 0.0f, 1.0f
};
return result;
// @formatter:on
}
#endif

View File

@@ -63,17 +63,18 @@ void ConvertSH3ToHBasis(in float3 sh[9], out float3 hBasis[4])
const float rt152 = sqrt(15.0f / 2.0f);
const float convMatrix[4 * 9] =
{
// @formatter:off
1.0f / rt2, 0, 0.5f * rt32, 0, 0, 0, 0, 0, 0,
0, 1.0f / rt2, 0, 0, 0, (3.0f / 8.0f) * rt52, 0, 0, 0,
0, 0, 1.0f / (2.0f * rt2), 0, 0, 0, 0.25f * rt152, 0, 0,
0, 0, 0, 1.0f / rt2, 0, 0, 0, (3.0f / 8.0f) * rt52, 0
// @formatter:on
};
UNROLL
for (uint row = 0; row < 4; row++)
{
hBasis[row] = 0.0f;
UNROLL
for (uint col = 0; col < 9; col++)
hBasis[row] += convMatrix[row * 9 + col] * sh[col];

View File

@@ -103,12 +103,9 @@ float3 TraceSceenSpaceReflection(float2 uv, GBufferSample gBuffer, Texture2D dep
{
break;
}
else
{
currOffset -= rayStep;
rayStep *= 0.5;
}
}
// Move forward
currOffset += rayStep;

View File

@@ -286,7 +286,7 @@ Texture3D<float4> LightScatteringHistory : register(t2);
Texture3D<float4> LocalShadowedLightScattering : register(t3);
Texture2DArray ShadowMapCSM : register(t4);
#if USE_DDGI
Texture2D<float4> ProbesState : register(t5);
Texture2D<snorm float4> ProbesState : register(t5);
Texture2D<float4> ProbesDistance : register(t6);
Texture2D<float4> ProbesIrradiance : register(t7);
#else
@@ -337,7 +337,7 @@ void CS_LightScattering(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_
#if USE_DDGI
// Dynamic Diffuse Global Illumination
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, positionWS, cameraVectorNormalized, 1.0f);
float3 irradiance = SampleDDGIIrradiance(DDGI, ProbesState, ProbesDistance, ProbesIrradiance, positionWS, cameraVectorNormalized, 0.0f, cellOffset.x);
lightScattering += float4(irradiance, 1);
#else
// Sky light