diff --git a/Content/Shaders/GI/GlobalSurfaceAtlas.flax b/Content/Shaders/GI/GlobalSurfaceAtlas.flax
index d3f164f56..7963c2a71 100644
--- a/Content/Shaders/GI/GlobalSurfaceAtlas.flax
+++ b/Content/Shaders/GI/GlobalSurfaceAtlas.flax
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:5ef0f096465bb267138c7f10ec745e171a6fd642a22801f339eb6da260665f0b
-size 12626
+oid sha256:5bccb119c58a4fcec267e452bdf6026b7e14531ffcf60680026ce964945457cb
+size 12461
diff --git a/Content/Shaders/SDF.flax b/Content/Shaders/SDF.flax
new file mode 100644
index 000000000..9f9a6a261
--- /dev/null
+++ b/Content/Shaders/SDF.flax
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd485ffce3c1d002621d795968cfda9c68555600157332dde91618d75881207e
+size 7903
diff --git a/Source/Editor/Cooker/Steps/DeployDataStep.cpp b/Source/Editor/Cooker/Steps/DeployDataStep.cpp
index e9e41a8ea..0b4f5d939 100644
--- a/Source/Editor/Cooker/Steps/DeployDataStep.cpp
+++ b/Source/Editor/Cooker/Steps/DeployDataStep.cpp
@@ -344,6 +344,7 @@ bool DeployDataStep::Perform(CookingData& data)
data.AddRootEngineAsset(TEXT("Shaders/Sky"));
data.AddRootEngineAsset(TEXT("Shaders/SSAO"));
data.AddRootEngineAsset(TEXT("Shaders/SSR"));
+ data.AddRootEngineAsset(TEXT("Shaders/SDF"));
data.AddRootEngineAsset(TEXT("Shaders/VolumetricFog"));
data.AddRootEngineAsset(TEXT("Engine/DefaultMaterial"));
data.AddRootEngineAsset(TEXT("Engine/DefaultDeformableMaterial"));
diff --git a/Source/Editor/Windows/Assets/ModelWindow.cs b/Source/Editor/Windows/Assets/ModelWindow.cs
index 51a2fa612..876736c55 100644
--- a/Source/Editor/Windows/Assets/ModelWindow.cs
+++ b/Source/Editor/Windows/Assets/ModelWindow.cs
@@ -298,11 +298,12 @@ namespace FlaxEditor.Windows.Assets
proxy.Window.Enabled = false;
Task.Run(() =>
{
- proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold);
+ bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold);
FlaxEngine.Scripting.InvokeOnUpdate(() =>
{
proxy.Window.Enabled = true;
- proxy.Window.MarkAsEdited();
+ if (!failed)
+ proxy.Window.MarkAsEdited();
Presenter.BuildLayoutOnUpdate();
});
});
diff --git a/Source/Engine/Content/AssetReference.h b/Source/Engine/Content/AssetReference.h
index dfa52c742..170c1b67c 100644
--- a/Source/Engine/Content/AssetReference.h
+++ b/Source/Engine/Content/AssetReference.h
@@ -93,7 +93,7 @@ public:
/// The asset to set.
AssetReference(T* asset)
{
- OnSet(asset);
+ OnSet((Asset*)asset);
}
///
@@ -215,7 +215,7 @@ public:
/// The asset.
void Set(T* asset)
{
- OnSet(asset);
+ OnSet((Asset*)asset);
}
};
diff --git a/Source/Engine/Content/Assets/Model.cpp b/Source/Engine/Content/Assets/Model.cpp
index 3bcaf4330..33d244d0c 100644
--- a/Source/Engine/Content/Assets/Model.cpp
+++ b/Source/Engine/Content/Assets/Model.cpp
@@ -650,7 +650,7 @@ bool Model::Save(bool withMeshDataFromGpu, const StringView& path)
#endif
-bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold)
+bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold, bool useGPU)
{
if (EnableModelSDF == 2)
return true; // Not supported
@@ -673,7 +673,10 @@ bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, f
#else
class MemoryWriteStream* outputStream = nullptr;
#endif
- if (ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold))
+ Locker.Unlock();
+ const bool failed = ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold, useGPU);
+ Locker.Lock();
+ if (failed)
return true;
#if USE_EDITOR
diff --git a/Source/Engine/Content/Assets/Model.h b/Source/Engine/Content/Assets/Model.h
index c02a3bdcf..5c5310bb9 100644
--- a/Source/Engine/Content/Assets/Model.h
+++ b/Source/Engine/Content/Assets/Model.h
@@ -227,8 +227,9 @@ public:
/// The index of the LOD to use for the SDF building.
/// If true, the generated SDF texture data will be cached on CPU (in asset chunk storage) to allow saving it later, otherwise it will be runtime for GPU-only. Ignored for virtual assets or in build.
/// Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh.
+ /// Enables using GPU for SDF generation, otherwise CPU will be used (async via Job System).
/// True if failed, otherwise false.
- API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f);
+ API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f, bool useGPU = true);
///
/// Sets set SDF data (releases the current one).
diff --git a/Source/Engine/Tools/ModelTool/ModelTool.cpp b/Source/Engine/Tools/ModelTool/ModelTool.cpp
index cbf080aaa..99c98bb0e 100644
--- a/Source/Engine/Tools/ModelTool/ModelTool.cpp
+++ b/Source/Engine/Tools/ModelTool/ModelTool.cpp
@@ -8,15 +8,20 @@
#include "Engine/Core/RandomStream.h"
#include "Engine/Core/Math/Vector3.h"
#include "Engine/Core/Math/Ray.h"
-#include "Engine/Profiler/ProfilerCPU.h"
+#include "Engine/Platform/ConditionVariable.h"
+#include "Engine/Profiler/Profiler.h"
#include "Engine/Threading/JobSystem.h"
+#include "Engine/Graphics/GPUDevice.h"
+#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Async/GPUTask.h"
+#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Graphics/Textures/GPUTexture.h"
#include "Engine/Graphics/Textures/TextureData.h"
#include "Engine/Graphics/Models/ModelData.h"
#include "Engine/Content/Assets/Model.h"
#include "Engine/Content/Content.h"
+#include "Engine/Content/Assets/Shader.h"
#include "Engine/Serialization/MemoryWriteStream.h"
#include "Engine/Engine/Units.h"
#if USE_EDITOR
@@ -71,7 +76,261 @@ ModelSDFMip::ModelSDFMip(int32 mipIndex, const TextureMipData& mip)
{
}
-bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold)
+class GPUModelSDFTask : public GPUTask
+{
+ ConditionVariable* _signal;
+ AssetReference _shader;
+ Model* _inputModel;
+ ModelData* _modelData;
+ int32 _lodIndex;
+ Int3 _resolution;
+ ModelBase::SDFData* _sdf;
+ GPUBuffer *_sdfSrc, *_sdfDst;
+ GPUTexture* _sdfResult;
+ Float3 _xyzToLocalMul, _xyzToLocalAdd;
+
+ const uint32 ThreadGroupSize = 64;
+ PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data
+ {
+ Int3 Resolution;
+ uint32 ResolutionSize;
+ float MaxDistance;
+ uint32 VertexStride;
+ int32 Index16bit;
+ uint32 TriangleCount;
+ Float3 VoxelToPosMul;
+ float WorldUnitsPerVoxel;
+ Float3 VoxelToPosAdd;
+ uint32 ThreadGroupsX;
+ });
+
+public:
+ GPUModelSDFTask(ConditionVariable& signal, Model* inputModel, ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd)
+ : GPUTask(Type::Custom)
+ , _signal(&signal)
+ , _shader(Content::LoadAsyncInternal(TEXT("Shaders/SDF")))
+ , _inputModel(inputModel)
+ , _modelData(modelData)
+ , _lodIndex(lodIndex)
+ , _resolution(resolution)
+ , _sdf(sdf)
+ , _sdfSrc(GPUBuffer::New())
+ , _sdfDst(GPUBuffer::New())
+ , _sdfResult(sdfResult)
+ , _xyzToLocalMul(xyzToLocalMul)
+ , _xyzToLocalAdd(xyzToLocalAdd)
+ {
+ }
+
+ ~GPUModelSDFTask()
+ {
+ SAFE_DELETE_GPU_RESOURCE(_sdfSrc);
+ SAFE_DELETE_GPU_RESOURCE(_sdfDst);
+ }
+
+ Result run(GPUTasksContext* tasksContext) override
+ {
+ PROFILE_GPU_CPU("GPUModelSDFTask");
+ GPUContext* context = tasksContext->GPU;
+
+ // Allocate resources
+ if (_shader == nullptr || _shader->WaitForLoaded())
+ return Result::Failed;
+ GPUShader* shader = _shader->GetShader();
+ const uint32 resolutionSize = _resolution.X * _resolution.Y * _resolution.Z;
+ auto desc = GPUBufferDescription::Typed(resolutionSize, PixelFormat::R32_UInt, true);
+ // TODO: use transient texture (single frame)
+ if (_sdfSrc->Init(desc) || _sdfDst->Init(desc))
+ return Result::Failed;
+ auto cb = shader->GetCB(0);
+ Data data;
+ data.Resolution = _resolution;
+ data.ResolutionSize = resolutionSize;
+ data.MaxDistance = _sdf->MaxDistance;
+ data.WorldUnitsPerVoxel = _sdf->WorldUnitsPerVoxel;
+ data.VoxelToPosMul = _xyzToLocalMul;
+ data.VoxelToPosAdd = _xyzToLocalAdd;
+
+ // Dispatch in 1D and fallback to 2D when using large resolution
+ Int3 threadGroups(Math::CeilToInt((float)resolutionSize / ThreadGroupSize), 1, 1);
+ if (threadGroups.X > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
+ {
+ const uint32 groups = threadGroups.X;
+ threadGroups.X = Math::CeilToInt(Math::Sqrt((float)groups));
+ threadGroups.Y = Math::CeilToInt((float)groups / threadGroups.X);
+ }
+ data.ThreadGroupsX = threadGroups.X;
+
+ // Init SDF volume
+ context->BindCB(0, cb);
+ context->UpdateCB(cb, &data);
+ context->BindUA(0, _sdfSrc->View());
+ context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
+
+ // Rendering input triangles into the SDF volume
+ if (_inputModel)
+ {
+ PROFILE_GPU_CPU_NAMED("Rasterize");
+ const ModelLOD& lod = _inputModel->LODs[Math::Clamp(_lodIndex, _inputModel->HighestResidentLODIndex(), _inputModel->LODs.Count() - 1)];
+ GPUBuffer *vbTemp = nullptr, *ibTemp = nullptr;
+ for (int32 i = 0; i < lod.Meshes.Count(); i++)
+ {
+ const Mesh& mesh = lod.Meshes[i];
+ const MaterialSlot& materialSlot = _inputModel->MaterialSlots[mesh.GetMaterialSlotIndex()];
+ if (materialSlot.Material && !materialSlot.Material->WaitForLoaded())
+ {
+ // Skip transparent materials
+ if (materialSlot.Material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
+ continue;
+ }
+
+ GPUBuffer* vb = mesh.GetVertexBuffer(0);
+ GPUBuffer* ib = mesh.GetIndexBuffer();
+ data.Index16bit = mesh.Use16BitIndexBuffer() ? 1 : 0;
+ data.VertexStride = vb->GetStride();
+ data.TriangleCount = mesh.GetTriangleCount();
+ const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
+ if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
+ {
+ // TODO: support larger meshes via 2D dispatch
+ LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
+ continue;
+ }
+ context->UpdateCB(cb, &data);
+ if (!EnumHasAllFlags(vb->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
+ {
+ desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource);
+ // TODO: use transient buffer (single frame)
+ if (!vbTemp)
+ vbTemp = GPUBuffer::New();
+ vbTemp->Init(desc);
+ context->CopyBuffer(vbTemp, vb, desc.Size);
+ vb = vbTemp;
+ }
+ if (!EnumHasAllFlags(ib->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
+ {
+ desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource);
+ // TODO: use transient buffer (single frame)
+ if (!ibTemp)
+ ibTemp = GPUBuffer::New();
+ ibTemp->Init(desc);
+ context->CopyBuffer(ibTemp, ib, desc.Size);
+ ib = ibTemp;
+ }
+ context->BindSR(0, vb->View());
+ context->BindSR(1, ib->View());
+ context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
+ }
+ SAFE_DELETE_GPU_RESOURCE(vbTemp);
+ SAFE_DELETE_GPU_RESOURCE(ibTemp);
+ }
+ else if (_modelData)
+ {
+ PROFILE_GPU_CPU_NAMED("Rasterize");
+ const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)];
+ auto vb = GPUBuffer::New();
+ auto ib = GPUBuffer::New();
+ for (int32 i = 0; i < lod.Meshes.Count(); i++)
+ {
+ const MeshData* mesh = lod.Meshes[i];
+ const MaterialSlotEntry& materialSlot = _modelData->Materials[mesh->MaterialSlotIndex];
+ auto material = Content::LoadAsync(materialSlot.AssetID);
+ if (material && !material->WaitForLoaded())
+ {
+ // Skip transparent materials
+ if (material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
+ continue;
+ }
+
+ data.Index16bit = 0;
+ data.VertexStride = sizeof(Float3);
+ data.TriangleCount = mesh->Indices.Count() / 3;
+ const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
+ if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
+ {
+ // TODO: support larger meshes via 2D dispatch
+ LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
+ continue;
+ }
+ context->UpdateCB(cb, &data);
+ desc = GPUBufferDescription::Raw(mesh->Positions.Count() * sizeof(Float3), GPUBufferFlags::ShaderResource);
+ desc.InitData = mesh->Positions.Get();
+ // TODO: use transient buffer (single frame)
+ vb->Init(desc);
+ desc = GPUBufferDescription::Raw(mesh->Indices.Count() * sizeof(uint32), GPUBufferFlags::ShaderResource);
+ desc.InitData = mesh->Indices.Get();
+ // TODO: use transient buffer (single frame)
+ ib->Init(desc);
+ context->BindSR(0, vb->View());
+ context->BindSR(1, ib->View());
+ context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
+ }
+ SAFE_DELETE_GPU_RESOURCE(vb);
+ SAFE_DELETE_GPU_RESOURCE(ib);
+ }
+
+ // Convert SDF volume data back to floats
+ context->Dispatch(shader->GetCS("CS_Resolve"), threadGroups.X, threadGroups.Y, threadGroups.Z);
+
+ // Run linear flood-fill loop to populate all voxels with valid distances (spreads the initial values from triangles rasterization)
+ {
+ PROFILE_GPU_CPU_NAMED("FloodFill");
+ auto csFloodFill = shader->GetCS("CS_FloodFill");
+ const int32 floodFillIterations = Math::Max(_resolution.MaxValue() / 2 + 1, 8);
+ for (int32 floodFill = 0; floodFill < floodFillIterations; floodFill++)
+ {
+ context->ResetUA();
+ context->BindUA(0, _sdfDst->View());
+ context->BindSR(0, _sdfSrc->View());
+ context->Dispatch(csFloodFill, threadGroups.X, threadGroups.Y, threadGroups.Z);
+ Swap(_sdfSrc, _sdfDst);
+ }
+ }
+
+ // Encode SDF values into output storage
+ context->ResetUA();
+ context->BindSR(0, _sdfSrc->View());
+ // TODO: update GPU SDF texture within this task to skip additional CPU->GPU copy
+ auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget);
+ // TODO: use transient texture (single frame)
+ auto sdfTexture = GPUTexture::New();
+ sdfTexture->Init(sdfTextureDesc);
+ context->BindUA(1, sdfTexture->ViewVolume());
+ context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z);
+
+ // Copy result data into readback buffer
+ if (_sdfResult)
+ {
+ sdfTextureDesc = sdfTextureDesc.ToStagingReadback();
+ _sdfResult->Init(sdfTextureDesc);
+ context->CopyTexture(_sdfResult, 0, 0, 0, 0, sdfTexture, 0);
+ }
+
+ SAFE_DELETE_GPU_RESOURCE(sdfTexture);
+
+ return Result::Ok;
+ }
+
+ void OnSync() override
+ {
+ GPUTask::OnSync();
+ _signal->NotifyOne();
+ }
+
+ void OnFail() override
+ {
+ GPUTask::OnFail();
+ _signal->NotifyOne();
+ }
+
+ void OnCancel() override
+ {
+ GPUTask::OnCancel();
+ _signal->NotifyOne();
+ }
+};
+
+bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold, bool useGPU)
{
PROFILE_CPU();
auto startTime = Platform::GetTimeSeconds();
@@ -127,7 +386,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
*(uint8*)ptr = (uint8)v;
};
}
- GPUTextureDescription textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount);
+ auto textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount);
if (outputSDF)
{
*outputSDF = sdf;
@@ -143,19 +402,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
#endif
}
- // TODO: support GPU to generate model SDF on-the-fly (if called during rendering)
-
- // Setup acceleration structure for fast ray tracing the mesh triangles
- MeshAccelerationStructure scene;
- if (inputModel)
- scene.Add(inputModel, lodIndex);
- else if (modelData)
- scene.Add(modelData, lodIndex);
- scene.BuildBVH();
-
// Allocate memory for the distant field
const int32 voxelsSize = resolution.X * resolution.Y * resolution.Z * formatStride;
- void* voxels = Allocator::Allocate(voxelsSize);
+ BytesContainer voxels;
+ voxels.Allocate(voxelsSize);
Float3 xyzToLocalMul = uvwToLocalMul / Float3(resolution - 1);
Float3 xyzToLocalAdd = uvwToLocalAdd;
const Float2 encodeMAD(0.5f / sdf.MaxDistance * formatMaxValue, 0.5f * formatMaxValue);
@@ -163,74 +413,125 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
int32 voxelSizeSum = voxelsSize;
// TODO: use optimized sparse storage for SDF data as hierarchical bricks as in papers below:
+ // https://gpuopen.com/gdc-presentations/2023/GDC-2023-Sparse-Distance-Fields-For-Games.pdf + https://www.youtube.com/watch?v=iY15xhuuHPQ&ab_channel=AMD
// https://graphics.pixar.com/library/IrradianceAtlas/paper.pdf
// http://maverick.inria.fr/Membres/Cyril.Crassin/thesis/CCrassinThesis_EN_Web.pdf
// http://ramakarl.com/pdfs/2016_Hoetzlein_GVDB.pdf
// https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf
- // Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
- constexpr int32 sampleCount = 12;
- Float3 sampleDirections[sampleCount];
+ // Check if run SDF generation on a GPU via Compute Shader or on a Job System
+ useGPU &= GPUDevice::Instance
+ && GPUDevice::Instance->GetState() == GPUDevice::DeviceState::Ready
+ && GPUDevice::Instance->Limits.HasCompute
+ && format == PixelFormat::R16_UNorm
+ && !IsInMainThread() // TODO: support GPU to generate model SDF on-the-fly directly into virtual model (if called during rendering)
+ && resolution.MaxValue() > 8;
+ if (useGPU)
{
- RandomStream rand;
- sampleDirections[0] = Float3::Up;
- sampleDirections[1] = Float3::Down;
- sampleDirections[2] = Float3::Left;
- sampleDirections[3] = Float3::Right;
- sampleDirections[4] = Float3::Forward;
- sampleDirections[5] = Float3::Backward;
- for (int32 i = 6; i < sampleCount; i++)
- sampleDirections[i] = rand.GetUnitVector();
- }
- Function sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z)
- {
- PROFILE_CPU_NAMED("Model SDF Job");
- Real hitDistance;
- Vector3 hitNormal, hitPoint;
- Triangle hitTriangle;
- const int32 zAddress = resolution.Y * resolution.X * z;
- for (int32 y = 0; y < resolution.Y; y++)
+ PROFILE_CPU_NAMED("GPU");
+
+ // TODO: skip using sdfResult and downloading SDF from GPU when updating virtual model
+ auto sdfResult = GPUTexture::New();
+
+ // Run SDF generation via GPU async task
+ ConditionVariable signal;
+ CriticalSection mutex;
+ Task* task = New(signal, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd);
+ task->Start();
+ mutex.Lock();
+ signal.Wait(mutex);
+ mutex.Unlock();
+ bool failed = task->IsFailed();
+
+ // Gather result data from GPU to CPU
+ if (!failed && sdfResult)
{
- const int32 yAddress = resolution.X * y + zAddress;
- for (int32 x = 0; x < resolution.X; x++)
- {
- Real minDistance = sdf.MaxDistance;
- Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
-
- // Point query to find the distance to the closest surface
- scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
-
- // Raycast samples around voxel to count triangle backfaces hit
- int32 hitBackCount = 0, hitCount = 0;
- for (int32 sample = 0; sample < sampleCount; sample++)
- {
- Ray sampleRay(voxelPos, sampleDirections[sample]);
- sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
- if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
- {
- if (hitDistance < minDistance)
- minDistance = hitDistance;
- hitCount++;
- const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
- if (backHit)
- hitBackCount++;
- }
- }
-
- float distance = (float)minDistance;
- // TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
- // if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
- if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
- {
- // Voxel is inside the geometry so turn it into negative distance to the surface
- distance *= -1;
- }
- const int32 xAddress = x + yAddress;
- formatWrite((byte*)voxels + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
- }
+ TextureMipData mipData;
+ const uint32 rowPitch = resolution.X * formatStride;
+ failed = sdfResult->GetData(0, 0, mipData, rowPitch);
+ failed |= voxels.Length() != mipData.Data.Length();
+ if (!failed)
+ voxels = mipData.Data;
}
- };
- JobSystem::Execute(sdfJob, resolution.Z);
+
+ SAFE_DELETE_GPU_RESOURCE(sdfResult);
+ if (failed)
+ return true;
+ }
+ else
+ {
+ // Setup acceleration structure for fast ray tracing the mesh triangles
+ MeshAccelerationStructure scene;
+ if (inputModel)
+ scene.Add(inputModel, lodIndex);
+ else if (modelData)
+ scene.Add(modelData, lodIndex);
+ scene.BuildBVH();
+
+ // Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
+ constexpr int32 sampleCount = 12;
+ Float3 sampleDirections[sampleCount];
+ {
+ RandomStream rand;
+ sampleDirections[0] = Float3::Up;
+ sampleDirections[1] = Float3::Down;
+ sampleDirections[2] = Float3::Left;
+ sampleDirections[3] = Float3::Right;
+ sampleDirections[4] = Float3::Forward;
+ sampleDirections[5] = Float3::Backward;
+ for (int32 i = 6; i < sampleCount; i++)
+ sampleDirections[i] = rand.GetUnitVector();
+ }
+ Function sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z)
+ {
+ PROFILE_CPU_NAMED("Model SDF Job");
+ Real hitDistance;
+ Vector3 hitNormal, hitPoint;
+ Triangle hitTriangle;
+ const int32 zAddress = resolution.Y * resolution.X * z;
+ for (int32 y = 0; y < resolution.Y; y++)
+ {
+ const int32 yAddress = resolution.X * y + zAddress;
+ for (int32 x = 0; x < resolution.X; x++)
+ {
+ Real minDistance = sdf.MaxDistance;
+ Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
+
+ // Point query to find the distance to the closest surface
+ scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
+
+ // Raycast samples around voxel to count triangle backfaces hit
+ int32 hitBackCount = 0, hitCount = 0;
+ for (int32 sample = 0; sample < sampleCount; sample++)
+ {
+ Ray sampleRay(voxelPos, sampleDirections[sample]);
+ sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
+ if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
+ {
+ if (hitDistance < minDistance)
+ minDistance = hitDistance;
+ hitCount++;
+ const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
+ if (backHit)
+ hitBackCount++;
+ }
+ }
+
+ float distance = (float)minDistance;
+ // TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
+ // if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
+ if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
+ {
+ // Voxel is inside the geometry so turn it into negative distance to the surface
+ distance *= -1;
+ }
+ const int32 xAddress = x + yAddress;
+ formatWrite(voxels.Get() + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
+ }
+ }
+ };
+ JobSystem::Execute(sdfJob, resolution.Z);
+ }
// Cache SDF data on a CPU
if (outputStream)
@@ -240,20 +541,19 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
outputStream->WriteBytes(&data, sizeof(data));
ModelSDFMip mipData(0, resolution.X * formatStride, voxelsSize);
outputStream->WriteBytes(&mipData, sizeof(mipData));
- outputStream->WriteBytes(voxels, voxelsSize);
+ outputStream->WriteBytes(voxels.Get(), voxelsSize);
}
// Upload data to the GPU
if (outputSDF)
{
- BytesContainer data;
- data.Link((byte*)voxels, voxelsSize);
- auto task = outputSDF->Texture->UploadMipMapAsync(data, 0, resolution.X * formatStride, voxelsSize, true);
+ auto task = outputSDF->Texture->UploadMipMapAsync(voxels, 0, resolution.X * formatStride, voxelsSize, true);
if (task)
task->Start();
}
// Generate mip maps
+ void* voxelsMipSrc = voxels.Get();
void* voxelsMip = nullptr;
for (int32 mipLevel = 1; mipLevel < mipCount; mipLevel++)
{
@@ -263,7 +563,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
voxelsMip = Allocator::Allocate(voxelsMipSize);
// Downscale mip
- Function mipJob = [&voxelsMip, &voxels, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z)
+ Function mipJob = [&voxelsMip, &voxelsMipSrc, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z)
{
PROFILE_CPU_NAMED("Model SDF Mip Job");
const int32 zAddress = resolutionMip.Y * resolutionMip.X * z;
@@ -284,7 +584,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
for (int32 dx = 0; dx < 2; dx++)
{
const int32 dxAddress = (x * 2 + dx) + dyAddress;
- const float d = formatRead((byte*)voxels + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y;
+ const float d = formatRead((byte*)voxelsMipSrc + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y;
distance += d;
}
}
@@ -318,12 +618,11 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
// Go down
voxelSizeSum += voxelsSize;
- Swap(voxelsMip, voxels);
+ Swap(voxelsMip, voxelsMipSrc);
resolution = resolutionMip;
}
Allocator::Free(voxelsMip);
- Allocator::Free(voxels);
#if !BUILD_RELEASE
auto endTime = Platform::GetTimeSeconds();
diff --git a/Source/Engine/Tools/ModelTool/ModelTool.h b/Source/Engine/Tools/ModelTool/ModelTool.h
index ed1736214..708b94342 100644
--- a/Source/Engine/Tools/ModelTool/ModelTool.h
+++ b/Source/Engine/Tools/ModelTool/ModelTool.h
@@ -98,7 +98,7 @@ API_CLASS(Namespace="FlaxEngine.Tools", Static) class FLAXENGINE_API ModelTool
// Optional: inputModel or modelData
// Optional: outputSDF or null, outputStream or null
- static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f);
+ static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f, bool useGPU = true);
#if USE_EDITOR
diff --git a/Source/Shaders/SDF.shader b/Source/Shaders/SDF.shader
new file mode 100644
index 000000000..a19e6d346
--- /dev/null
+++ b/Source/Shaders/SDF.shader
@@ -0,0 +1,272 @@
+// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
+
+// Mesh SDF generation based on https://github.com/GPUOpen-Effects/TressFX
+
+#include "./Flax/Common.hlsl"
+#include "./Flax/ThirdParty/TressFX/TressFXSDF.hlsl"
+
+#define THREAD_GROUP_SIZE 64
+
+META_CB_BEGIN(0, Data)
+int3 Resolution;
+uint ResolutionSize;
+float MaxDistance;
+uint VertexStride;
+bool Index16bit;
+uint TriangleCount;
+float3 VoxelToPosMul;
+float WorldUnitsPerVoxel;
+float3 VoxelToPosAdd;
+uint ThreadGroupsX;
+META_CB_END
+
+RWBuffer SDF : register(u0);
+
+uint GetVoxelIndex(uint3 groupId, uint groupIndex)
+{
+ return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * THREAD_GROUP_SIZE;
+}
+
+int3 ClampVoxelCoord(int3 coord)
+{
+ return clamp(coord, 0, Resolution - 1);
+}
+
+int GetVoxelIndex(int3 coord)
+{
+ return Resolution.x * Resolution.y * coord.z + Resolution.x * coord.y + coord.x;
+}
+
+float3 GetVoxelPos(int3 coord)
+{
+ return float3((float)coord.x, (float)coord.y, (float)coord.z) * VoxelToPosMul + VoxelToPosAdd;
+}
+
+int3 GetVoxelCoord(float3 pos)
+{
+ pos = (pos - VoxelToPosAdd) / VoxelToPosMul;
+ return int3((int)pos.x, (int)pos.y, (int)pos.z);
+}
+
+int3 GetVoxelCoord(uint index)
+{
+ uint sizeX = (uint)Resolution.x;
+ uint sizeY = (uint)(Resolution.x * Resolution.y);
+ uint coordZ = index / sizeY;
+ uint coordXY = index % sizeY;
+ uint coordY = coordXY / sizeX;
+ uint coordX = coordXY % sizeX;
+ return int3((int)coordX, (int)coordY, (int)coordZ);
+}
+
+// Clears SDF texture with the initial distance.
+META_CS(true, FEATURE_LEVEL_SM5)
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void CS_Init(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
+{
+ uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
+ if (voxelIndex >= ResolutionSize)
+ return;
+ float distance = MaxDistance * 10.0f; // Start with a very large value
+ SDF[voxelIndex] = FloatFlip3(distance);
+}
+
+// Unpacks SDF texture into distances stores as normal float value (FloatFlip3 is used for interlocked operations on uint).
+META_CS(true, FEATURE_LEVEL_SM5)
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void CS_Resolve(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
+{
+ uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
+ if (voxelIndex >= ResolutionSize)
+ return;
+ SDF[voxelIndex] = IFloatFlip3(SDF[voxelIndex]);
+}
+
+#ifdef _CS_RasterizeTriangle
+
+ByteAddressBuffer VertexBuffer : register(t0);
+ByteAddressBuffer IndexBuffer : register(t1);
+
+uint LoadIndex(uint i)
+{
+ if (Index16bit)
+ {
+ uint index = IndexBuffer.Load((i >> 1u) << 2u);
+ index = (i & 1u) == 1u ? (index >> 16) : index;
+ return index & 0xffff;
+ }
+ return IndexBuffer.Load(i << 2u);
+}
+
+float3 LoadVertex(uint i)
+{
+ return asfloat(VertexBuffer.Load3(i * VertexStride));
+}
+
+// Renders triangle mesh into the SDF texture by writing minimum distance to the triangle into all intersecting voxels.
+META_CS(true, FEATURE_LEVEL_SM5)
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID)
+{
+ uint triangleIndex = DispatchThreadId.x;
+ if (triangleIndex >= TriangleCount)
+ return;
+
+ // Load triangle
+ triangleIndex *= 3;
+ uint i0 = LoadIndex(triangleIndex + 0);
+ uint i1 = LoadIndex(triangleIndex + 1);
+ uint i2 = LoadIndex(triangleIndex + 2);
+ float3 v0 = LoadVertex(i0);
+ float3 v1 = LoadVertex(i1);
+ float3 v2 = LoadVertex(i2);
+
+ // Project triangle into SDF voxels
+ float3 vMargin = float3(WorldUnitsPerVoxel, WorldUnitsPerVoxel, WorldUnitsPerVoxel);
+ float3 vMin = min(min(v0, v1), v2) - vMargin;
+ float3 vMax = max(max(v0, v1), v2) + vMargin;
+ int3 voxelMargin = int3(1, 1, 1);
+ int3 voxelMin = GetVoxelCoord(vMin) - voxelMargin;
+ int3 voxelMax = GetVoxelCoord(vMax) + voxelMargin;
+ voxelMin = ClampVoxelCoord(voxelMin);
+ voxelMax = ClampVoxelCoord(voxelMax);
+
+ // Rasterize into SDF voxels
+ for (int z = voxelMin.z; z <= voxelMax.z; z++)
+ {
+ for (int y = voxelMin.y; y <= voxelMax.y; y++)
+ {
+ for (int x = voxelMin.x; x <= voxelMax.x; x++)
+ {
+ int3 voxelCoord = int3(x, y, z);
+ int voxelIndex = GetVoxelIndex(voxelCoord);
+ float3 voxelPos = GetVoxelPos(voxelCoord);
+ float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2);
+ InterlockedMin(SDF[voxelIndex], FloatFlip3(distance));
+ }
+ }
+ }
+}
+
+#endif
+
+#if defined(_CS_FloodFill) || defined(_CS_Encode)
+
+Buffer InSDF : register(t0);
+
+float GetVoxel(int voxelIndex)
+{
+ return asfloat(InSDF[voxelIndex]);
+}
+
+float GetVoxel(int3 coord)
+{
+ coord = ClampVoxelCoord(coord);
+ int voxelIndex = GetVoxelIndex(coord);
+ return GetVoxel(voxelIndex);
+}
+
+float CombineSDF(float sdf, int3 nearbyCoord, float nearbyDistance)
+{
+ // Sample nearby voxel
+ float sdfNearby = GetVoxel(nearbyCoord);
+
+ // Include distance to that nearby voxel
+ if (sdfNearby < 0.0f)
+ nearbyDistance *= -1;
+ sdfNearby += nearbyDistance;
+
+ if (sdfNearby > MaxDistance)
+ {
+ // Ignore if nearby sample is invalid (see CS_Init)
+ }
+ else if (sdf > MaxDistance)
+ {
+ // Use nearby sample if current one is invalid (see CS_Init)
+ sdf = sdfNearby;
+ }
+ else
+ {
+ // Use distance closer to 0
+ sdf = sdf >= 0 ? min(sdf, sdfNearby) : max(sdf, sdfNearby);
+ }
+
+ return sdf;
+}
+
+// Fills the voxels with minimum distances to the triangles.
+META_CS(true, FEATURE_LEVEL_SM5)
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
+{
+ uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
+ if (voxelIndex >= ResolutionSize)
+ return;
+ float sdf = GetVoxel(voxelIndex);
+
+ // Skip if the distance is already so small that we know that triangle is nearby
+ if (abs(sdf) > WorldUnitsPerVoxel * 1.2f)
+ {
+ int3 voxelCoord = GetVoxelCoord(voxelIndex);
+ int3 offset = int3(-1, 0, 1);
+
+ // Sample nearby voxels
+ float nearbyDistance = WorldUnitsPerVoxel;
+ sdf = CombineSDF(sdf, voxelCoord + offset.zyy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yzy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yyz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance);
+#if 0
+ nearbyDistance = WorldUnitsPerVoxel * 1.41421f;
+ sdf = CombineSDF(sdf, voxelCoord + offset.xxy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xzy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zzy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zxy, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xyx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xyz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zyz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zyx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yxx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yxz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yzz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.yzx, nearbyDistance);
+#endif
+#if 0
+ nearbyDistance = WorldUnitsPerVoxel * 1.73205f;
+ sdf = CombineSDF(sdf, voxelCoord + offset.xxx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xxz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xzx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.xzz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zxx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zxz, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zzx, nearbyDistance);
+ sdf = CombineSDF(sdf, voxelCoord + offset.zzz, nearbyDistance);
+#endif
+ }
+
+ SDF[voxelIndex] = asuint(sdf);
+}
+
+RWTexture3D SDFtex : register(u1);
+
+// Encodes SDF values into the packed format with normalized distances.
+META_CS(true, FEATURE_LEVEL_SM5)
+[numthreads(THREAD_GROUP_SIZE, 1, 1)]
+void CS_Encode(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
+{
+ uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
+ if (voxelIndex >= ResolutionSize)
+ return;
+ float sdf = GetVoxel(voxelIndex);
+ sdf = min(sdf, MaxDistance);
+
+ // Pack from range [-MaxDistance; +MaxDistance] to [0; 1]
+ sdf = (sdf / MaxDistance) * 0.5f + 0.5f;
+
+ int3 voxelCoord = GetVoxelCoord(voxelIndex);
+ SDFtex[voxelCoord] = sdf;
+}
+
+#endif
diff --git a/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl b/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl
new file mode 100644
index 000000000..13c28f16d
--- /dev/null
+++ b/Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl
@@ -0,0 +1,121 @@
+// Source: https://github.com/GPUOpen-Effects/TressFX
+// License: MIT
+
+//
+// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+//
+
+//When building the SDF we want to find the lowest distance at each SDF cell. In order to allow multiple threads to write to the same
+//cells, it is necessary to use atomics. However, there is no support for atomics with 32-bit floats so we convert the float into unsigned int
+//and use atomic_min() / InterlockedMin() as a workaround.
+//
+//When used with atomic_min, both FloatFlip2() and FloatFlip3() will store the float with the lowest magnitude.
+//The difference is that FloatFlip2() will preper negative values ( InterlockedMin( FloatFlip2(1.0), FloatFlip2(-1.0) ) == -1.0 ),
+//while FloatFlip3() prefers positive values ( InterlockedMin( FloatFlip3(1.0), FloatFlip3(-1.0) ) == 1.0 ).
+//Using FloatFlip3() seems to result in a SDF with higher quality compared to FloatFlip2().
+uint FloatFlip2(float fl)
+{
+ uint f = asuint(fl);
+ return (f << 1) | (f >> 31 ^ 0x00000001); //Rotate sign bit to least significant and Flip sign bit so that (0 == negative)
+}
+uint IFloatFlip2(uint f2)
+{
+ return (f2 >> 1) | (f2 << 31 ^ 0x80000000);
+}
+uint FloatFlip3(float fl)
+{
+ uint f = asuint(fl);
+ return (f << 1) | (f >> 31); //Rotate sign bit to least significant
+}
+uint IFloatFlip3(uint f2)
+{
+ return (f2 >> 1) | (f2 << 31);
+}
+
+float DistancePointToEdge(float3 p, float3 x0, float3 x1, out float3 n)
+{
+ float3 x10 = x1 - x0;
+
+ float t = dot(x1 - p, x10) / dot(x10, x10);
+ t = max(0.0f, min(t, 1.0f));
+
+ float3 a = p - (t*x0 + (1.0f - t)*x1);
+ float d = length(a);
+ n = a / (d + 1e-30f);
+
+ return d;
+}
+
+// Check if p is in the positive or negative side of triangle (x0, x1, x2)
+// Positive side is where the normal vector of triangle ( (x1-x0) x (x2-x0) ) is pointing to.
+float SignedDistancePointToTriangle(float3 p, float3 x0, float3 x1, float3 x2)
+{
+ float d = 0;
+ float3 x02 = x0 - x2;
+ float l0 = length(x02) + 1e-30f;
+ x02 = x02 / l0;
+ float3 x12 = x1 - x2;
+ float l1 = dot(x12, x02);
+ x12 = x12 - l1*x02;
+ float l2 = length(x12) + 1e-30f;
+ x12 = x12 / l2;
+ float3 px2 = p - x2;
+
+ float b = dot(x12, px2) / l2;
+ float a = (dot(x02, px2) - l1*b) / l0;
+ float c = 1 - a - b;
+
+ // normal vector of triangle. Don't need to normalize this yet.
+ float3 nTri = cross((x1 - x0), (x2 - x0));
+ float3 n;
+
+ float tol = 1e-8f;
+
+ if (a >= -tol && b >= -tol && c >= -tol)
+ {
+ n = p - (a*x0 + b*x1 + c*x2);
+ d = length(n);
+
+ float3 n1 = n / d;
+ float3 n2 = nTri / (length(nTri) + 1e-30f); // if d == 0
+
+ n = (d > 0) ? n1 : n2;
+ }
+ else
+ {
+ float3 n_12;
+ float3 n_02;
+ d = DistancePointToEdge(p, x0, x1, n);
+
+ float d12 = DistancePointToEdge(p, x1, x2, n_12);
+ float d02 = DistancePointToEdge(p, x0, x2, n_02);
+
+ d = min(d, d12);
+ d = min(d, d02);
+
+ n = (d == d12) ? n_12 : n;
+ n = (d == d02) ? n_02 : n;
+ }
+
+ d = (dot(p - x0, nTri) < 0.f) ? -d : d;
+
+ return d;
+}