Refactor Mesh SDF generation on GPU to use raytracing for more precise results

This commit is contained in:
Wojtek Figat
2025-11-13 22:05:23 +01:00
parent c7997e0c2f
commit 91ee9f5e05
12 changed files with 615 additions and 547 deletions

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:8e8d210a74ae373793eaee1ddab1372a6a50a000c489f97b2258a09cd93cc2d0
oid sha256:6a56dc14746606f0065d136ad0a69ae1aa41e8732ea380c657d75c187aa09f54
size 5031

Binary file not shown.

BIN
Content/Shaders/SDF.flax (Stored with Git LFS)

Binary file not shown.

View File

@@ -90,25 +90,15 @@ namespace FlaxEditor.Windows.Assets
var gpu = group.Checkbox("Bake on GPU", "If checked, SDF generation will be calculated using GPU on Compute Shader, otherwise CPU will use Job System. GPU generation is fast but result in artifacts in various meshes (eg. foliage).");
gpu.CheckBox.Checked = sdfOptions.GPU;
gpu.CheckBox.StateChanged += c => { Window._sdfOptions.GPU = c.Checked; };
var backfacesThresholdProp = group.AddPropertyItem("Backfaces Threshold", "Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh.");
var backfacesThreshold = backfacesThresholdProp.FloatValue();
var backfacesThresholdLabel = backfacesThresholdProp.Labels.Last();
backfacesThreshold.ValueBox.MinValue = 0.001f;
backfacesThreshold.ValueBox.MaxValue = 1.0f;
backfacesThreshold.ValueBox.Value = sdfOptions.BackfacesThreshold;
backfacesThreshold.ValueBox.BoxValueChanged += b => { Window._sdfOptions.BackfacesThreshold = b.Value; };
// Toggle Backfaces Threshold visibility (CPU-only option)
gpu.CheckBox.StateChanged += c =>
{
Window._sdfOptions.GPU = c.Checked;
backfacesThresholdLabel.Visible = !c.Checked;
backfacesThreshold.ValueBox.Visible = !c.Checked;
};
backfacesThresholdLabel.Visible = !gpu.CheckBox.Checked;
backfacesThreshold.ValueBox.Visible = !gpu.CheckBox.Checked;
var lodIndex = group.IntegerValue("LOD Index", "Index of the model Level of Detail to use for SDF data building. By default uses the lowest quality LOD for fast building.");
lodIndex.IntValue.MinValue = 0;
lodIndex.IntValue.MaxValue = Asset.LODsCount - 1;

View File

@@ -92,9 +92,8 @@ float GPUTimerQueryDX11::GetResult()
{
if (!_finalized)
{
#if BUILD_DEBUG
ASSERT(HasResult());
#endif
if (!HasResult())
return 0;
UINT64 timeStart, timeEnd;
auto context = _device->GetIM();

View File

@@ -3,17 +3,29 @@
#if COMPILE_WITH_MODEL_TOOL
#include "MeshAccelerationStructure.h"
#include "Engine/Core/Log.h"
#include "Engine/Core/Math/Math.h"
#include "Engine/Content/Content.h"
#include "Engine/Content/Assets/Model.h"
#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/Models/ModelData.h"
#include "Engine/Profiler/ProfilerCPU.h"
void MeshAccelerationStructure::BuildBVH(int32 node, int32 maxLeafSize, Array<byte>& scratch)
PACK_STRUCT(struct GPUBVH {
Float3 BoundsMin;
uint32 Index;
Float3 BoundsMax;
int32 Count; // Negative for non-leaf nodes
});
static_assert(sizeof(GPUBVH) == sizeof(Float4) * 2, "Invalid BVH structure size for GPU.");
void MeshAccelerationStructure::BuildBVH(int32 node, BVHBuild& build)
{
auto& root = _bvh[node];
ASSERT_LOW_LAYER(root.Leaf.IsLeaf);
if (root.Leaf.TriangleCount <= maxLeafSize)
if (build.MaxLeafSize > 0 && root.Leaf.TriangleCount <= build.MaxLeafSize)
return;
if (build.MaxDepth > 0 && build.NodeDepth >= build.MaxDepth)
return;
// Spawn two leaves
@@ -64,8 +76,8 @@ RETRY:
{
uint16 I0, I1, I2;
};
scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
auto dst = (Tri*)scratch.Get();
build.Scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
auto dst = (Tri*)build.Scratch.Get();
auto ib16 = meshData.IndexBuffer.Get<uint16>();
for (int32 i = indexStart; i < indexEnd;)
{
@@ -90,13 +102,13 @@ RETRY:
indexStart = 0;
indexEnd = left.Leaf.TriangleCount * 3;
for (int32 i = indexStart; i < indexEnd; i++)
left.Bounds.Merge(vb[((uint16*)scratch.Get())[i]]);
left.Bounds.Merge(vb[((uint16*)build.Scratch.Get())[i]]);
right.Bounds = BoundingBox(vb[dst[root.Leaf.TriangleCount - 1].I0]);
indexStart = left.Leaf.TriangleCount;
indexEnd = root.Leaf.TriangleCount * 3;
for (int32 i = indexStart; i < indexEnd; i++)
right.Bounds.Merge(vb[((uint16*)scratch.Get())[i]]);
right.Bounds.Merge(vb[((uint16*)build.Scratch.Get())[i]]);
}
else
{
@@ -104,8 +116,8 @@ RETRY:
{
uint32 I0, I1, I2;
};
scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
auto dst = (Tri*)scratch.Get();
build.Scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
auto dst = (Tri*)build.Scratch.Get();
auto ib32 = meshData.IndexBuffer.Get<uint32>();
for (int32 i = indexStart; i < indexEnd;)
{
@@ -130,17 +142,19 @@ RETRY:
indexStart = 0;
indexEnd = left.Leaf.TriangleCount * 3;
for (int32 i = indexStart; i < indexEnd; i++)
left.Bounds.Merge(vb[((uint32*)scratch.Get())[i]]);
left.Bounds.Merge(vb[((uint32*)build.Scratch.Get())[i]]);
right.Bounds = BoundingBox(vb[dst[root.Leaf.TriangleCount - 1].I0]);
indexStart = left.Leaf.TriangleCount;
indexEnd = root.Leaf.TriangleCount * 3;
for (int32 i = indexStart; i < indexEnd; i++)
right.Bounds.Merge(vb[((uint32*)scratch.Get())[i]]);
right.Bounds.Merge(vb[((uint32*)build.Scratch.Get())[i]]);
}
ASSERT_LOW_LAYER(left.Leaf.TriangleCount + right.Leaf.TriangleCount == root.Leaf.TriangleCount);
left.Leaf.TriangleIndex = root.Leaf.TriangleIndex;
right.Leaf.TriangleIndex = left.Leaf.TriangleIndex + left.Leaf.TriangleCount;
build.MaxNodeTriangles = Math::Max(build.MaxNodeTriangles, (int32)right.Leaf.TriangleCount);
build.MaxNodeTriangles = Math::Max(build.MaxNodeTriangles, (int32)right.Leaf.TriangleCount);
// Convert into a node
root.Node.IsLeaf = 0;
@@ -148,8 +162,11 @@ RETRY:
root.Node.ChildrenCount = 2;
// Split children
BuildBVH(childIndex, maxLeafSize, scratch);
BuildBVH(childIndex + 1, maxLeafSize, scratch);
build.NodeDepth++;
build.MaxNodeDepth = Math::Max(build.NodeDepth, build.MaxNodeDepth);
BuildBVH(childIndex, build);
BuildBVH(childIndex + 1, build);
build.NodeDepth--;
}
bool MeshAccelerationStructure::PointQueryBVH(int32 node, const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle) const
@@ -160,7 +177,7 @@ bool MeshAccelerationStructure::PointQueryBVH(int32 node, const Vector3& point,
{
// Find closest triangle
Vector3 p;
const Mesh& meshData = _meshes[root.Leaf.MeshIndex];
const Mesh& meshData = _meshes.Get()[root.Leaf.MeshIndex];
const Float3* vb = meshData.VertexBuffer.Get<Float3>();
const int32 indexStart = root.Leaf.TriangleIndex * 3;
const int32 indexEnd = indexStart + root.Leaf.TriangleCount * 3;
@@ -229,7 +246,7 @@ bool MeshAccelerationStructure::RayCastBVH(int32 node, const Ray& ray, Real& hit
if (root.Leaf.IsLeaf)
{
// Ray cast along triangles in the leaf
const Mesh& meshData = _meshes[root.Leaf.MeshIndex];
const Mesh& meshData = _meshes.Get()[root.Leaf.MeshIndex];
const Float3* vb = meshData.VertexBuffer.Get<Float3>();
const int32 indexStart = root.Leaf.TriangleIndex * 3;
const int32 indexEnd = indexStart + root.Leaf.TriangleCount * 3;
@@ -381,6 +398,7 @@ void MeshAccelerationStructure::Add(const ModelData* modelData, int32 lodIndex,
void MeshAccelerationStructure::Add(Float3* vb, int32 vertices, void* ib, int32 indices, bool use16BitIndex, bool copy)
{
ASSERT(vertices % 3 == 0);
auto& meshData = _meshes.AddOne();
meshData.Asset = nullptr;
if (copy)
@@ -395,43 +413,122 @@ void MeshAccelerationStructure::Add(Float3* vb, int32 vertices, void* ib, int32
meshData.Vertices = vertices;
meshData.Indices = indices;
meshData.Use16BitIndexBuffer = use16BitIndex;
BoundingBox::FromPoints(meshData.VertexBuffer.Get<Float3>(), vertices, meshData.Bounds);
}
void MeshAccelerationStructure::BuildBVH(int32 maxLeafSize)
void MeshAccelerationStructure::MergeMeshes(bool force16BitIndexBuffer)
{
if (_meshes.Count() == 0)
return;
if (_meshes.Count() == 1 && (!force16BitIndexBuffer || !_meshes[0].Use16BitIndexBuffer))
return;
PROFILE_CPU();
auto meshes = _meshes;
_meshes.Clear();
_meshes.Resize(1);
auto& mesh = _meshes[0];
mesh.Asset = nullptr;
mesh.Use16BitIndexBuffer = true;
mesh.Indices = 0;
mesh.Vertices = 0;
mesh.Bounds = meshes[0].Bounds;
for (auto& e : meshes)
{
if (!e.Use16BitIndexBuffer)
mesh.Use16BitIndexBuffer = false;
mesh.Vertices += e.Vertices;
mesh.Indices += e.Indices;
BoundingBox::Merge(mesh.Bounds, e.Bounds, mesh.Bounds);
}
mesh.Use16BitIndexBuffer &= mesh.Indices <= MAX_uint16 && !force16BitIndexBuffer;
mesh.VertexBuffer.Allocate(mesh.Vertices * sizeof(Float3));
mesh.IndexBuffer.Allocate(mesh.Indices * sizeof(uint32));
int32 vertexCounter = 0, indexCounter = 0;
for (auto& e : meshes)
{
Platform::MemoryCopy(mesh.VertexBuffer.Get() + vertexCounter * sizeof(Float3), e.VertexBuffer.Get(), e.Vertices * sizeof(Float3));
if (e.Use16BitIndexBuffer)
{
for (int32 i = 0; i < e.Indices; i++)
{
uint16 index = ((uint16*)e.IndexBuffer.Get())[i];
((uint32*)mesh.IndexBuffer.Get())[indexCounter + i] = vertexCounter + index;
}
}
else
{
for (int32 i = 0; i < e.Indices; i++)
{
uint16 index = ((uint32*)e.IndexBuffer.Get())[i];
((uint32*)mesh.IndexBuffer.Get())[indexCounter + i] = vertexCounter + index;
}
}
vertexCounter += e.Vertices;
indexCounter += e.Indices;
if (e.Asset)
e.Asset->RemoveReference();
}
}
void MeshAccelerationStructure::BuildBVH(int32 maxLeafSize, int32 maxDepth)
{
if (_meshes.Count() == 0)
return;
PROFILE_CPU();
BVHBuild build;
build.MaxLeafSize = maxLeafSize;
build.MaxDepth = maxDepth;
// Estimate memory usage
int32 trianglesCount = 0;
for (const Mesh& meshData : _meshes)
trianglesCount += meshData.Indices / 3;
_bvh.Clear();
_bvh.EnsureCapacity(trianglesCount / maxLeafSize);
_bvh.EnsureCapacity(trianglesCount / Math::Max(maxLeafSize, 16));
// Init with the root node and all meshes as leaves
auto& root = _bvh.AddOne();
root.Node.IsLeaf = 0;
root.Node.ChildIndex = 1;
root.Node.ChildrenCount = _meshes.Count();
root.Bounds = _meshes[0].Bounds;
for (int32 i = 0; i < _meshes.Count(); i++)
// Skip using root node if BVH contains only one mesh
if (_meshes.Count() == 1)
{
const Mesh& meshData = _meshes[i];
const Mesh& meshData = _meshes.First();
auto& child = _bvh.AddOne();
child.Leaf.IsLeaf = 1;
child.Leaf.MeshIndex = i;
child.Leaf.MeshIndex = 0;
child.Leaf.TriangleIndex = 0;
child.Leaf.TriangleCount = meshData.Indices / 3;
child.Bounds = meshData.Bounds;
BoundingBox::Merge(root.Bounds, meshData.Bounds, root.Bounds);
Array<byte> scratch;
BuildBVH(0, build);
}
else
{
// Init with the root node and all meshes as leaves
auto& root = _bvh.AddOne();
root.Node.IsLeaf = 0;
root.Node.ChildIndex = 1;
root.Node.ChildrenCount = _meshes.Count();
root.Bounds = _meshes[0].Bounds;
for (int32 i = 0; i < _meshes.Count(); i++)
{
const Mesh& meshData = _meshes[i];
auto& child = _bvh.AddOne();
child.Leaf.IsLeaf = 1;
child.Leaf.MeshIndex = i;
child.Leaf.TriangleIndex = 0;
child.Leaf.TriangleCount = meshData.Indices / 3;
child.Bounds = meshData.Bounds;
BoundingBox::Merge(root.Bounds, meshData.Bounds, root.Bounds);
}
// Sub-divide mesh nodes into smaller leaves
build.MaxNodeDepth = build.MaxDepth = 2;
Array<byte> scratch;
for (int32 i = 0; i < _meshes.Count(); i++)
BuildBVH(i + 1, build);
build.NodeDepth = 0;
}
// Sub-divide mesh nodes into smaller leaves
Array<byte> scratch;
for (int32 i = 0; i < _meshes.Count(); i++)
BuildBVH(i + 1, maxLeafSize, scratch);
LOG(Info, "BVH nodes: {}, max depth: {}, max triangles: {}", _bvh.Count(), build.MaxNodeDepth, build.MaxNodeTriangles);
}
bool MeshAccelerationStructure::PointQuery(const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle, Real maxDistance) const
@@ -579,4 +676,80 @@ bool MeshAccelerationStructure::RayCast(const Ray& ray, Real& hitDistance, Vecto
}
}
MeshAccelerationStructure::GPU::~GPU()
{
SAFE_DELETE_GPU_RESOURCE(BVHBuffer);
SAFE_DELETE_GPU_RESOURCE(VertexBuffer);
SAFE_DELETE_GPU_RESOURCE(IndexBuffer);
}
MeshAccelerationStructure::GPU::operator bool() const
{
// Index buffer is initialized as last one so all other buffers are fine too
return IndexBuffer && IndexBuffer->GetSize() != 0;
}
MeshAccelerationStructure::GPU MeshAccelerationStructure::ToGPU()
{
PROFILE_CPU();
GPU gpu;
// GPU BVH operates on a single mesh with 32-bit indices
MergeMeshes(true);
// Construct BVH
const int32 BVH_STACK_SIZE = 32; // This must match HLSL shader
BuildBVH(0, BVH_STACK_SIZE);
// Upload BVH
{
Array<GPUBVH> bvhData;
bvhData.Resize(_bvh.Count());
for (int32 i = 0; i < _bvh.Count(); i++)
{
const auto& src = _bvh.Get()[i];
auto& dst = bvhData.Get()[i];
dst.BoundsMin = src.Bounds.Minimum;
dst.BoundsMax = src.Bounds.Maximum;
if (src.Leaf.IsLeaf)
{
dst.Index = src.Leaf.TriangleIndex * 3;
dst.Count = src.Leaf.TriangleCount * 3;
}
else
{
dst.Index = src.Node.ChildIndex;
dst.Count = -(int32)src.Node.ChildrenCount; // Mark as non-leaf
ASSERT(src.Node.ChildrenCount == 2); // GPU shader is hardcoded for 2 children per node
}
}
gpu.BVHBuffer = GPUBuffer::New();
auto desc =GPUBufferDescription::Structured(_bvh.Count(), sizeof(GPUBVH));
desc.InitData = bvhData.Get();
if (gpu.BVHBuffer->Init(desc))
return gpu;
}
// Upload vertex buffer
{
const Mesh& mesh = _meshes[0];
gpu.VertexBuffer = GPUBuffer::New();
auto desc = GPUBufferDescription::Raw(mesh.Vertices * sizeof(Float3), GPUBufferFlags::ShaderResource);
desc.InitData = mesh.VertexBuffer.Get();
if (gpu.VertexBuffer->Init(desc))
return gpu;
}
// Upload index buffer
{
const Mesh& mesh = _meshes[0];
gpu.IndexBuffer = GPUBuffer::New();
auto desc = GPUBufferDescription::Raw(mesh.Indices * sizeof(uint32), GPUBufferFlags::ShaderResource);
desc.InitData = mesh.IndexBuffer.Get();
gpu.IndexBuffer->Init(desc);
}
return gpu;
}
#endif

View File

@@ -11,6 +11,7 @@
class Model;
class ModelData;
class GPUBuffer;
/// <summary>
/// Acceleration Structure utility for robust ray tracing mesh geometry with optimized data structure.
@@ -50,10 +51,19 @@ private:
};
};
struct BVHBuild
{
int32 MaxLeafSize, MaxDepth;
int32 NodeDepth = 0;
int32 MaxNodeDepth = 0;
int32 MaxNodeTriangles = 0;
Array<byte> Scratch;
};
Array<Mesh, InlinedAllocation<16>> _meshes;
Array<BVH> _bvh;
void BuildBVH(int32 node, int32 maxLeafSize, Array<byte>& scratch);
void BuildBVH(int32 node, BVHBuild& build);
bool PointQueryBVH(int32 node, const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle) const;
bool RayCastBVH(int32 node, const Ray& ray, Real& hitDistance, Vector3& hitNormal, Triangle& hitTriangle) const;
@@ -69,14 +79,56 @@ public:
// Adds the triangles geometry for the build to the structure.
void Add(Float3* vb, int32 vertices, void* ib, int32 indices, bool use16BitIndex, bool copy = false);
// Merges all added meshes into a single mesh (to reduce number of BVH nodes). Required for GPU BVH build.
void MergeMeshes(bool force16BitIndexBuffer = false);
// Builds Bounding Volume Hierarchy (BVH) structure for accelerated geometry queries.
void BuildBVH(int32 maxLeafSize = 16);
void BuildBVH(int32 maxLeafSize = 16, int32 maxDepth = 0);
// Queries the closest triangle.
bool PointQuery(const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle, Real maxDistance = MAX_Real) const;
// Ray traces the triangles.
bool RayCast(const Ray& ray, Real& hitDistance, Vector3& hitNormal, Triangle& hitTriangle, Real maxDistance = MAX_Real) const;
public:
struct GPU
{
GPUBuffer* BVHBuffer;
GPUBuffer* VertexBuffer;
GPUBuffer* IndexBuffer;
bool Valid;
GPU()
: BVHBuffer(nullptr)
, VertexBuffer(nullptr)
, IndexBuffer(nullptr)
{
}
GPU(GPU&& other) noexcept
: BVHBuffer(other.BVHBuffer)
, VertexBuffer(other.VertexBuffer)
, IndexBuffer(other.IndexBuffer)
{
other.BVHBuffer = nullptr;
other.VertexBuffer = nullptr;
other.IndexBuffer = nullptr;
}
GPU& operator=(GPU other)
{
Swap(*this, other);
return *this;
}
~GPU();
operator bool() const;
};
// Converts the acceleration structure data to GPU format for raytracing inside a shader.
GPU ToGPU();
};
#endif

View File

@@ -14,6 +14,7 @@
#include "Engine/Threading/Threading.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/GPUTimerQuery.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Async/GPUTask.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
@@ -81,14 +82,18 @@ class GPUModelSDFTask : public GPUTask
{
ConditionVariable* _signal;
AssetReference<Shader> _shader;
MeshAccelerationStructure* _scene;
Model* _inputModel;
const ModelData* _modelData;
int32 _lodIndex;
float _backfacesThreshold;
Int3 _resolution;
ModelBase::SDFData* _sdf;
GPUBuffer *_sdfSrc, *_sdfDst;
GPUTexture* _sdfResult;
Float3 _xyzToLocalMul, _xyzToLocalAdd;
#if GPU_ALLOW_PROFILE_EVENTS
GPUTimerQuery* _timerQuery;
#endif
const uint32 ThreadGroupSize = 64;
GPU_CB_STRUCT(Data {
@@ -96,7 +101,7 @@ class GPUModelSDFTask : public GPUTask
uint32 ResolutionSize;
float MaxDistance;
uint32 VertexStride;
int32 Index16bit;
float BackfacesThreshold;
uint32 TriangleCount;
Float3 VoxelToPosMul;
float WorldUnitsPerVoxel;
@@ -105,47 +110,46 @@ class GPUModelSDFTask : public GPUTask
});
public:
GPUModelSDFTask(ConditionVariable& signal, Model* inputModel, const ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd)
: GPUTask(Type::Custom)
GPUModelSDFTask(ConditionVariable& signal, MeshAccelerationStructure* scene, Model* inputModel, const ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd, float backfacesThreshold)
: GPUTask(Type::Custom, GPU_ALLOW_PROFILE_EVENTS ? 4 : GPU_ASYNC_LATENCY) // Fix timer query result reading with some more latency
, _signal(&signal)
, _shader(Content::LoadAsyncInternal<Shader>(TEXT("Shaders/SDF")))
, _scene(scene)
, _inputModel(inputModel)
, _modelData(modelData)
, _lodIndex(lodIndex)
, _backfacesThreshold(backfacesThreshold)
, _resolution(resolution)
, _sdf(sdf)
, _sdfSrc(GPUBuffer::New())
, _sdfDst(GPUBuffer::New())
, _sdfResult(sdfResult)
, _xyzToLocalMul(xyzToLocalMul)
, _xyzToLocalAdd(xyzToLocalAdd)
{
#if GPU_ENABLE_RESOURCE_NAMING
_sdfSrc->SetName(TEXT("SDFSrc"));
_sdfDst->SetName(TEXT("SDFDst"));
#if GPU_ALLOW_PROFILE_EVENTS
, _timerQuery(GPUDevice::Instance->CreateTimerQuery())
#endif
{
}
~GPUModelSDFTask()
{
SAFE_DELETE_GPU_RESOURCE(_sdfSrc);
SAFE_DELETE_GPU_RESOURCE(_sdfDst);
#if GPU_ALLOW_PROFILE_EVENTS
SAFE_DELETE_GPU_RESOURCE(_timerQuery);
#endif
}
Result run(GPUTasksContext* tasksContext) override
{
PROFILE_GPU_CPU("GPUModelSDFTask");
GPUContext* context = tasksContext->GPU;
#if GPU_ALLOW_PROFILE_EVENTS
_timerQuery->Begin();
#endif
// Allocate resources
if (_shader == nullptr || _shader->WaitForLoaded())
return Result::Failed;
GPUShader* shader = _shader->GetShader();
const uint32 resolutionSize = _resolution.X * _resolution.Y * _resolution.Z;
auto desc = GPUBufferDescription::Typed(resolutionSize, PixelFormat::R32_UInt, true);
// TODO: use transient texture (single frame)
if (_sdfSrc->Init(desc) || _sdfDst->Init(desc))
return Result::Failed;
auto cb = shader->GetCB(0);
Data data;
data.Resolution = _resolution;
@@ -154,6 +158,13 @@ public:
data.WorldUnitsPerVoxel = _sdf->WorldUnitsPerVoxel;
data.VoxelToPosMul = _xyzToLocalMul;
data.VoxelToPosAdd = _xyzToLocalAdd;
data.BackfacesThreshold = _backfacesThreshold - 0.05f; // Bias a bit
// Send BVH to the GPU
auto bvh = _scene->ToGPU();
CHECK_RETURN(bvh.BVHBuffer && bvh.VertexBuffer && bvh.IndexBuffer, Result::Failed);
data.VertexStride = sizeof(Float3);
data.TriangleCount = bvh.IndexBuffer->GetElementsCount() / 3;
// Dispatch in 1D and fallback to 2D when using large resolution
Int3 threadGroups(Math::CeilToInt((float)resolutionSize / ThreadGroupSize), 1, 1);
@@ -165,159 +176,34 @@ public:
}
data.ThreadGroupsX = threadGroups.X;
// Init SDF volume
// Init constants
context->BindCB(0, cb);
context->UpdateCB(cb, &data);
context->BindUA(0, _sdfSrc->View());
context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Rendering input triangles into the SDF volume
if (_inputModel)
{
PROFILE_GPU_CPU_NAMED("Rasterize");
const ModelLOD& lod = _inputModel->LODs[Math::Clamp(_lodIndex, _inputModel->HighestResidentLODIndex(), _inputModel->LODs.Count() - 1)];
GPUBuffer *vbTemp = nullptr, *ibTemp = nullptr;
for (int32 i = 0; i < lod.Meshes.Count(); i++)
{
const Mesh& mesh = lod.Meshes[i];
const MaterialSlot& materialSlot = _inputModel->MaterialSlots[mesh.GetMaterialSlotIndex()];
if (materialSlot.Material && !materialSlot.Material->WaitForLoaded())
{
// Skip transparent materials
if (materialSlot.Material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
continue;
}
GPUBuffer* vb = mesh.GetVertexBuffer(0);
GPUBuffer* ib = mesh.GetIndexBuffer();
data.Index16bit = mesh.Use16BitIndexBuffer() ? 1 : 0;
data.VertexStride = vb->GetStride();
data.TriangleCount = mesh.GetTriangleCount();
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
{
// TODO: support larger meshes via 2D dispatch
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
continue;
}
context->UpdateCB(cb, &data);
if (!EnumHasAllFlags(vb->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
{
desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource);
// TODO: use transient buffer (single frame)
if (!vbTemp)
{
vbTemp = GPUBuffer::New();
#if GPU_ENABLE_RESOURCE_NAMING
vbTemp->SetName(TEXT("SDFvb"));
#endif
}
vbTemp->Init(desc);
context->CopyBuffer(vbTemp, vb, desc.Size);
vb = vbTemp;
}
if (!EnumHasAllFlags(ib->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
{
desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource);
// TODO: use transient buffer (single frame)
if (!ibTemp)
{
ibTemp = GPUBuffer::New();
#if GPU_ENABLE_RESOURCE_NAMING
ibTemp->SetName(TEXT("SDFib"));
#endif
}
ibTemp->Init(desc);
context->CopyBuffer(ibTemp, ib, desc.Size);
ib = ibTemp;
}
context->BindSR(0, vb->View());
context->BindSR(1, ib->View());
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
}
SAFE_DELETE_GPU_RESOURCE(vbTemp);
SAFE_DELETE_GPU_RESOURCE(ibTemp);
}
else if (_modelData)
{
PROFILE_GPU_CPU_NAMED("Rasterize");
const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)];
auto vb = GPUBuffer::New();
auto ib = GPUBuffer::New();
#if GPU_ENABLE_RESOURCE_NAMING
vb->SetName(TEXT("SDFvb"));
ib->SetName(TEXT("SDFib"));
#endif
for (int32 i = 0; i < lod.Meshes.Count(); i++)
{
const MeshData* mesh = lod.Meshes[i];
const MaterialSlotEntry& materialSlot = _modelData->Materials[mesh->MaterialSlotIndex];
auto material = Content::LoadAsync<MaterialBase>(materialSlot.AssetID);
if (material && !material->WaitForLoaded())
{
// Skip transparent materials
if (material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
continue;
}
data.Index16bit = 0;
data.VertexStride = sizeof(Float3);
data.TriangleCount = mesh->Indices.Count() / 3;
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
{
// TODO: support larger meshes via 2D dispatch
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
continue;
}
context->UpdateCB(cb, &data);
desc = GPUBufferDescription::Raw(mesh->Positions.Count() * sizeof(Float3), GPUBufferFlags::ShaderResource);
desc.InitData = mesh->Positions.Get();
// TODO: use transient buffer (single frame)
vb->Init(desc);
desc = GPUBufferDescription::Raw(mesh->Indices.Count() * sizeof(uint32), GPUBufferFlags::ShaderResource);
desc.InitData = mesh->Indices.Get();
// TODO: use transient buffer (single frame)
ib->Init(desc);
context->BindSR(0, vb->View());
context->BindSR(1, ib->View());
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
}
SAFE_DELETE_GPU_RESOURCE(vb);
SAFE_DELETE_GPU_RESOURCE(ib);
}
// Convert SDF volume data back to floats
context->Dispatch(shader->GetCS("CS_Resolve"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Run linear flood-fill loop to populate all voxels with valid distances (spreads the initial values from triangles rasterization)
{
PROFILE_GPU_CPU_NAMED("FloodFill");
auto csFloodFill = shader->GetCS("CS_FloodFill");
const int32 floodFillIterations = Math::Max(_resolution.MaxValue() / 2 + 1, 8);
for (int32 floodFill = 0; floodFill < floodFillIterations; floodFill++)
{
context->ResetUA();
context->BindUA(0, _sdfDst->View());
context->BindSR(0, _sdfSrc->View());
context->Dispatch(csFloodFill, threadGroups.X, threadGroups.Y, threadGroups.Z);
Swap(_sdfSrc, _sdfDst);
}
}
// Encode SDF values into output storage
context->ResetUA();
context->BindSR(0, _sdfSrc->View());
// TODO: update GPU SDF texture within this task to skip additional CPU->GPU copy
auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget);
// Allocate output texture
auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess);
// TODO: use transient texture (single frame)
auto sdfTexture = GPUTexture::New();
#if GPU_ENABLE_RESOURCE_NAMING
sdfTexture->SetName(TEXT("SDFTexture"));
#endif
sdfTexture->Init(sdfTextureDesc);
context->BindUA(1, sdfTexture->ViewVolume());
context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Renders directly to the output texture
context->BindUA(0, sdfTexture->ViewVolume());
// Init the volume (rasterization mixes with existing contents)
context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Render input triangles into the SDF volume
{
PROFILE_GPU("Rasterize");
context->BindSR(0, bvh.VertexBuffer->View());
context->BindSR(1, bvh.IndexBuffer->View());
context->BindSR(2, bvh.BVHBuffer->View());
auto* rasterizeCS = shader->GetCS("CS_RasterizeTriangles");
context->Dispatch(rasterizeCS, threadGroups.X, threadGroups.Y, threadGroups.Z);
}
// Copy result data into readback buffer
if (_sdfResult)
@@ -329,6 +215,9 @@ public:
SAFE_DELETE_GPU_RESOURCE(sdfTexture);
#if GPU_ALLOW_PROFILE_EVENTS
_timerQuery->End();
#endif
return Result::Ok;
}
@@ -336,6 +225,10 @@ public:
{
GPUTask::OnSync();
_signal->NotifyOne();
#if GPU_ALLOW_PROFILE_EVENTS
if (_timerQuery->HasResult())
LOG(Info, "GPU SDF generation took {} ms", Utilities::RoundTo1DecimalPlace(_timerQuery->GetResult()));
#endif
}
void OnFail() override
@@ -445,6 +338,13 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
// http://ramakarl.com/pdfs/2016_Hoetzlein_GVDB.pdf
// https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf
// Setup acceleration structure for fast ray tracing the mesh triangles
MeshAccelerationStructure scene;
if (inputModel)
scene.Add(inputModel, lodIndex);
else if (modelData)
scene.Add(modelData, lodIndex);
// Check if run SDF generation on a GPU via Compute Shader or on a Job System
useGPU &= GPUDevice::Instance
&& GPUDevice::Instance->GetState() == GPUDevice::DeviceState::Ready
@@ -465,7 +365,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
// Run SDF generation via GPU async task
ConditionVariable signal;
CriticalSection mutex;
Task* task = New<GPUModelSDFTask>(signal, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd);
Task* task = New<GPUModelSDFTask>(signal, &scene, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd, backfacesThreshold);
task->Start();
mutex.Lock();
signal.Wait(mutex);
@@ -489,16 +389,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
}
else
{
// Setup acceleration structure for fast ray tracing the mesh triangles
MeshAccelerationStructure scene;
if (inputModel)
scene.Add(inputModel, lodIndex);
else if (modelData)
scene.Add(modelData, lodIndex);
scene.BuildBVH();
// Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
constexpr int32 sampleCount = 12;
constexpr int32 sampleCount = BUILD_DEBUG ? 6 : 12;
Float3 sampleDirections[sampleCount];
{
RandomStream rand;
@@ -526,36 +420,30 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
Real minDistance = sdf.MaxDistance;
Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
// Point query to find the distance to the closest surface
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
// Raycast samples around voxel to count triangle backfaces hit
int32 hitBackCount = 0, hitCount = 0;
int32 hitBackCount = 0, minBackfaceHitCount = (int32)(sampleCount * backfacesThreshold);
for (int32 sample = 0; sample < sampleCount; sample++)
{
Ray sampleRay(voxelPos, sampleDirections[sample]);
sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
{
if (hitDistance < minDistance)
minDistance = hitDistance;
hitCount++;
const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
if (backHit)
hitBackCount++;
minDistance = Math::Min(hitDistance, minDistance);
if (Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0)
{
if (++hitBackCount >= minBackfaceHitCount)
break;
}
}
}
float distance = (float)minDistance;
// TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
// if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
{
// Voxel is inside the geometry so turn it into negative distance to the surface
distance *= -1;
}
// Point query to find the distance to the closest surface
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle, minDistance);
if (hitBackCount >= minBackfaceHitCount)
minDistance *= -1; // Voxel is inside the geometry so turn it into negative distance to the surface
const int32 xAddress = x + yAddress;
formatWrite(voxels.Get() + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
formatWrite(voxels.Get() + xAddress * formatStride, minDistance * encodeMAD.X + encodeMAD.Y);
}
}
};

View File

@@ -18,6 +18,26 @@ bool RayHitRect(float3 r, float3 rectCenter, float3 rectX, float3 rectY, float3
return inExtentX && inExtentY;
}
// Determines whether there is an intersection between a ray (rPos and rDir) and a triangle (v0, v1, v2).
// Returns true on intersection and outputs the distance along the ray to the intersection point.
// This method tests if the ray intersects either the front or back of the triangle.
bool RayIntersectsTriangle(float3 rPos, float3 rDir, float3 v0, float3 v1, float3 v2, out float distance)
{
// [https://stackoverflow.com/a/42752998]
float3 edgeAB = v1 - v0;
float3 edgeAC = v2 - v0;
float3 triFaceVector = cross(edgeAB, edgeAC);
float3 vertRayOffset = rPos - v0;
float3 rayOffsetPerp = cross(vertRayOffset, rDir);
float determinant = -dot(rDir, triFaceVector);
float invDet = 1.0f / determinant;
distance = dot(vertRayOffset, triFaceVector) * invDet;
float u = dot(edgeAC, rayOffsetPerp) * invDet;
float v = -dot(edgeAB, rayOffsetPerp) * invDet;
float w = 1.0f - u - v;
return abs(determinant) >= 1E-8 && distance > 0 && u >= 0 && v >= 0 && w >= 0;
}
// Hits axis-aligned box (boxMin, boxMax) with a line (lineStart, lineEnd).
// Returns the intersections on the line (x - closest, y - furthest).
// Line hits the box if: intersections.x < intersections.y.
@@ -42,4 +62,39 @@ bool BoxIntersectsSphere(float3 boxMin, float3 boxMax, float3 sphereCenter, floa
return distance(sphereCenter, clampedCenter) <= sphereRadius;
}
// Calculates unsigned distance from point to the AABB. If point is inside it, returns 0.
float PointDistanceBox(float3 boxMin, float3 boxMax, float3 pos)
{
float3 clampedPos = clamp(pos, boxMin, boxMax);
return length(clampedPos - pos);
}
float dot2(float3 v)
{
return dot(v, v);
}
// Calculates squared distance from point to the triangle.
float DistancePointToTriangle2(float3 p, float3 v1, float3 v2, float3 v3)
{
// [Inigo Quilez, https://iquilezles.org/articles/triangledistance/]
float3 v21 = v2 - v1; float3 p1 = p - v1;
float3 v32 = v3 - v2; float3 p2 = p - v2;
float3 v13 = v1 - v3; float3 p3 = p - v3;
float3 nor = cross(v21, v13);
return // inside/outside test
(sign(dot(cross(v21, nor), p1)) +
sign(dot(cross(v32, nor), p2)) +
sign(dot(cross(v13, nor), p3)) < 2.0)
?
// 3 edges
min(min(
dot2(v21 * saturate(dot(v21, p1) / dot2(v21)) - p1),
dot2(v32 * saturate(dot(v32, p2) / dot2(v32)) - p2)),
dot2(v13 * saturate(dot(v13, p3) / dot2(v13)) - p3))
:
// 1 face
dot(nor, p1) * dot(nor, p1) / dot2(nor);
}
#endif

View File

@@ -0,0 +1,163 @@
// Copyright (c) Wojciech Figat. All rights reserved.
#ifndef __MESH_ACCELERATION_STRUCTURE__
#define __MESH_ACCELERATION_STRUCTURE__
#include "./Flax/Collisions.hlsl"
// This must match MeshAccelerationStructure::ToGPU
#define BVH_STACK_SIZE 32
struct BVHNode
{
float3 BoundsMin;
uint Index;
float3 BoundsMax;
int Count; // Negative for non-leaf nodes
};
struct BVHBuffers
{
StructuredBuffer<BVHNode> BVHBuffer;
ByteAddressBuffer VertexBuffer;
ByteAddressBuffer IndexBuffer;
uint VertexStride;
};
struct BVHHit
{
float Distance;
bool IsBackface;
};
float3 LoadVertexBVH(BVHBuffers bvh, uint index)
{
index = bvh.IndexBuffer.Load(index << 2u);
return asfloat(bvh.VertexBuffer.Load3(index * bvh.VertexStride));
}
// [https://tavianator.com/2011/ray_box.html]
float RayTestBoxBVH(float3 rayPos, float3 rayDir, float3 boxMin, float3 boxMax)
{
float3 rayInvDir = rcp(rayDir);
float3 tMin = (boxMin - rayPos) * rayInvDir;
float3 tMax = (boxMax - rayPos) * rayInvDir;
float3 t1 = min(tMin, tMax);
float tNear = max(max(t1.x, t1.y), t1.z);
float3 t2 = max(tMin, tMax);
float tFar = min(min(t2.x, t2.y), t2.z);
bool hit = tFar >= tNear && tFar > 0;
return hit ? max(tNear, 0) : -1;
}
// Performs raytracing against the BVH acceleration structure to find the closest intersection with a triangle.
bool RayCastBVH(BVHBuffers bvh, float3 rayPos, float3 rayDir, out BVHHit hit, float maxDistance = 1000000.0f)
{
hit = (BVHHit)0;
hit.Distance = maxDistance;
// Stack-based recursion, starts from root node
uint stack[BVH_STACK_SIZE];
uint stackCount = 1;
stack[0] = 0;
bool result = false;
LOOP
while (stackCount > 0)
{
BVHNode node = bvh.BVHBuffer[stack[--stackCount]];
// Raytrace bounds
float boundsHit = RayTestBoxBVH(rayPos, rayDir, node.BoundsMin, node.BoundsMax);
BRANCH
if (boundsHit >= 0 && boundsHit < hit.Distance)
{
BRANCH
if (node.Count > 0) // Is leaf?
{
// Ray cast along all triangles in the leaf
uint indexStart = node.Index;
uint indexEnd = indexStart + node.Count;
for (uint i = indexStart; i < indexEnd;)
{
// Load triangle
float3 v0 = LoadVertexBVH(bvh, i++);
float3 v1 = LoadVertexBVH(bvh, i++);
float3 v2 = LoadVertexBVH(bvh, i++);
// Raytrace triangle
float distance;
if (RayIntersectsTriangle(rayPos, rayDir, v0, v1, v2, distance) && distance < hit.Distance)
{
float3 n = normalize(cross(v1 - v0, v2 - v0));
hit.Distance = distance;
hit.IsBackface = dot(rayDir, n) > 0;
result = true;
}
}
}
else
{
// Push children onto the stack to be tested
stack[stackCount++] = node.Index + 0;
stack[stackCount++] = node.Index + 1;
}
}
}
return result;
}
// Performs a query against the BVH acceleration structure to find the closest distance to a triangle from a given point.
bool PointQueryBVH(BVHBuffers bvh, float3 pos, out BVHHit hit, float maxDistance = 1000000.0f)
{
hit = (BVHHit)0;
hit.Distance = maxDistance;
// Stack-based recursion, starts from root node
uint stack[BVH_STACK_SIZE];
uint stackCount = 1;
stack[0] = 0;
bool result = false;
LOOP
while (stackCount > 0)
{
BVHNode node = bvh.BVHBuffer[stack[--stackCount]];
// Skip too far nodes
if (PointDistanceBox(node.BoundsMin, node.BoundsMax, pos) >= hit.Distance)
continue;
BRANCH
if (node.Count > 0) // Is leaf?
{
// Find the closest triangles in the leaf
uint indexStart = node.Index;
uint indexEnd = indexStart + node.Count;
for (uint i = indexStart; i < indexEnd;)
{
// Load triangle
float3 v0 = LoadVertexBVH(bvh, i++);
float3 v1 = LoadVertexBVH(bvh, i++);
float3 v2 = LoadVertexBVH(bvh, i++);
// Check triangle
float distance = sqrt(DistancePointToTriangle2(pos, v0, v1, v2));
if (distance < hit.Distance)
{
hit.Distance = distance;
result = true;
}
}
}
else
{
// Push children onto the stack to be tested
stack[stackCount++] = node.Index + 0;
stack[stackCount++] = node.Index + 1;
}
}
return result;
}
#endif

View File

@@ -1,18 +1,14 @@
// Copyright (c) Wojciech Figat. All rights reserved.
// Mesh SDF generation based on https://github.com/GPUOpen-Effects/TressFX
#include "./Flax/Common.hlsl"
#include "./Flax/ThirdParty/TressFX/TressFXSDF.hlsl"
#define THREAD_GROUP_SIZE 64
#include "./Flax/MeshAccelerationStructure.hlsl"
META_CB_BEGIN(0, Data)
int3 Resolution;
uint ResolutionSize;
float MaxDistance;
uint VertexStride;
bool Index16bit;
float BackfacesThreshold;
uint TriangleCount;
float3 VoxelToPosMul;
float WorldUnitsPerVoxel;
@@ -20,21 +16,9 @@ float3 VoxelToPosAdd;
uint ThreadGroupsX;
META_CB_END
RWBuffer<uint> SDF : register(u0);
uint GetVoxelIndex(uint3 groupId, uint groupIndex)
uint GetVoxelIndex(uint3 groupId, uint groupIndex, uint groupSize)
{
return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * THREAD_GROUP_SIZE;
}
int3 ClampVoxelCoord(int3 coord)
{
return clamp(coord, 0, Resolution - 1);
}
int GetVoxelIndex(int3 coord)
{
return Resolution.x * Resolution.y * coord.z + Resolution.x * coord.y + coord.x;
return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * groupSize;
}
float3 GetVoxelPos(int3 coord)
@@ -42,12 +26,6 @@ float3 GetVoxelPos(int3 coord)
return float3((float)coord.x, (float)coord.y, (float)coord.z) * VoxelToPosMul + VoxelToPosAdd;
}
int3 GetVoxelCoord(float3 pos)
{
pos = (pos - VoxelToPosAdd) / VoxelToPosMul;
return int3((int)pos.x, (int)pos.y, (int)pos.z);
}
int3 GetVoxelCoord(uint index)
{
uint sizeX = (uint)Resolution.x;
@@ -59,191 +37,90 @@ int3 GetVoxelCoord(uint index)
return int3((int)coordX, (int)coordY, (int)coordZ);
}
// Clears SDF texture with the initial distance.
#ifdef _CS_Init
#define THREAD_GROUP_SIZE 64
RWTexture3D<unorm half> SDFtex : register(u0);
// Clears SDF texture with the maximum distance.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Init(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex, THREAD_GROUP_SIZE);
if (voxelIndex >= ResolutionSize)
return;
float distance = MaxDistance * 10.0f; // Start with a very large value
SDF[voxelIndex] = FloatFlip3(distance);
int3 voxelCoord = GetVoxelCoord(voxelIndex);
SDFtex[voxelCoord] = 1.0f;
}
// Unpacks SDF texture into distances stores as normal float value (FloatFlip3 is used for interlocked operations on uint).
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Resolve(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
SDF[voxelIndex] = IFloatFlip3(SDF[voxelIndex]);
}
#endif
#ifdef _CS_RasterizeTriangle
#ifdef _CS_RasterizeTriangles
#define THREAD_GROUP_SIZE 64
RWTexture3D<unorm half> SDFtex : register(u0);
ByteAddressBuffer VertexBuffer : register(t0);
ByteAddressBuffer IndexBuffer : register(t1);
uint LoadIndex(uint i)
{
if (Index16bit)
{
uint index = IndexBuffer.Load((i >> 1u) << 2u);
index = (i & 1u) == 1u ? (index >> 16) : index;
return index & 0xffff;
}
return IndexBuffer.Load(i << 2u);
}
float3 LoadVertex(uint i)
{
return asfloat(VertexBuffer.Load3(i * VertexStride));
}
StructuredBuffer<BVHNode> BVHBuffer : register(t2);
// Renders triangle mesh into the SDF texture by writing minimum distance to the triangle into all intersecting voxels.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID)
void CS_RasterizeTriangles(uint3 GroupId : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupIndex)
{
uint triangleIndex = DispatchThreadId.x;
if (triangleIndex >= TriangleCount)
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex, THREAD_GROUP_SIZE);
if (voxelIndex >= ResolutionSize)
return;
int3 voxelCoord = GetVoxelCoord(voxelIndex);
float3 voxelPos = GetVoxelPos(voxelCoord);
// Load triangle
triangleIndex *= 3;
uint i0 = LoadIndex(triangleIndex + 0);
uint i1 = LoadIndex(triangleIndex + 1);
uint i2 = LoadIndex(triangleIndex + 2);
float3 v0 = LoadVertex(i0);
float3 v1 = LoadVertex(i1);
float3 v2 = LoadVertex(i2);
BVHBuffers bvh;
bvh.BVHBuffer = BVHBuffer;
bvh.VertexBuffer = VertexBuffer;
bvh.IndexBuffer = IndexBuffer;
bvh.VertexStride = VertexStride;
// Project triangle into SDF voxels
float3 vMargin = float3(WorldUnitsPerVoxel, WorldUnitsPerVoxel, WorldUnitsPerVoxel);
float3 vMin = min(min(v0, v1), v2) - vMargin;
float3 vMax = max(max(v0, v1), v2) + vMargin;
int3 voxelMargin = int3(1, 1, 1);
int3 voxelMin = GetVoxelCoord(vMin) - voxelMargin;
int3 voxelMax = GetVoxelCoord(vMax) + voxelMargin;
voxelMin = ClampVoxelCoord(voxelMin);
voxelMax = ClampVoxelCoord(voxelMax);
// Point query to find the distance to the closest surface
BVHHit hit;
PointQueryBVH(bvh, voxelPos, hit, MaxDistance);
float sdf = hit.Distance;
// Rasterize into SDF voxels
for (int z = voxelMin.z; z <= voxelMax.z; z++)
// Raycast triangles around voxel to count triangle backfaces hit
#define CLOSEST_CACHE_SIZE 6
float3 closestDirections[CLOSEST_CACHE_SIZE] =
{
for (int y = voxelMin.y; y <= voxelMax.y; y++)
float3(+1, 0, 0),
float3(-1, 0, 0),
float3(0, +1, 0),
float3(0, -1, 0),
float3(0, 0, +1),
float3(0, 0, -1),
};
uint hitBackCount = 0;
uint minBackfaceHitCount = (uint)(CLOSEST_CACHE_SIZE * BackfacesThreshold);
for (uint i = 0; i < CLOSEST_CACHE_SIZE; i++)
{
float3 rayDir = closestDirections[i];
if (RayCastBVH(bvh, voxelPos, rayDir, hit, MaxDistance))
{
for (int x = voxelMin.x; x <= voxelMax.x; x++)
{
int3 voxelCoord = int3(x, y, z);
int voxelIndex = GetVoxelIndex(voxelCoord);
float3 voxelPos = GetVoxelPos(voxelCoord);
float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2);
#if 0
if (distance < -10.0f) // TODO: find a better way to reject negative distance from degenerate triangles that break SDF shape
distance = abs(distance);
#endif
InterlockedMin(SDF[voxelIndex], FloatFlip3(distance));
}
sdf = min(sdf, hit.Distance);
if (hit.IsBackface)
hitBackCount++;
}
}
}
#endif
#if defined(_CS_FloodFill) || defined(_CS_Encode)
Buffer<uint> InSDF : register(t0);
float GetVoxel(int voxelIndex)
{
return asfloat(InSDF[voxelIndex]);
}
float GetVoxel(int3 coord)
{
coord = ClampVoxelCoord(coord);
int voxelIndex = GetVoxelIndex(coord);
return GetVoxel(voxelIndex);
}
float CombineSDF(float sdf, int3 nearbyCoord, float nearbyDistance)
{
// Sample nearby voxel
float sdfNearby = GetVoxel(nearbyCoord);
// Include distance to that nearby voxel
if (sdfNearby < 0.0f)
nearbyDistance *= -1;
sdfNearby += nearbyDistance;
if (sdfNearby > MaxDistance)
if (hitBackCount >= minBackfaceHitCount)
{
// Ignore if nearby sample is invalid (see CS_Init)
// Voxel is inside the geometry so turn it into negative distance to the surface
sdf *= -1;
}
else if (sdf > MaxDistance)
{
// Use nearby sample if current one is invalid (see CS_Init)
sdf = sdfNearby;
}
else
{
// Use distance closer to 0
sdf = sdf >= 0 ? min(sdf, sdfNearby) : max(sdf, sdfNearby);
}
return sdf;
}
// Fills the voxels with minimum distances to the triangles.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
float sdf = GetVoxel(voxelIndex);
// Skip if the distance is already so small that we know that triangle is nearby
if (abs(sdf) > WorldUnitsPerVoxel * 1.2f)
{
int3 voxelCoord = GetVoxelCoord(voxelIndex);
int3 offset = int3(-1, 0, 1);
// Sample nearby voxels
float nearbyDistance = WorldUnitsPerVoxel;
sdf = CombineSDF(sdf, voxelCoord + offset.zyy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yzy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yyz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance);
}
SDF[voxelIndex] = asuint(sdf);
}
RWTexture3D<half> SDFtex : register(u1);
// Encodes SDF values into the packed format with normalized distances.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Encode(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
float sdf = GetVoxel(voxelIndex);
sdf = min(sdf, MaxDistance);
// Pack from range [-MaxDistance; +MaxDistance] to [0; 1]
sdf = clamp(sdf, -MaxDistance, MaxDistance);
sdf = (sdf / MaxDistance) * 0.5f + 0.5f;
int3 voxelCoord = GetVoxelCoord(voxelIndex);
SDFtex[voxelCoord] = sdf;
}

View File

@@ -1,129 +0,0 @@
// Source: https://github.com/GPUOpen-Effects/TressFX
// License: MIT
//
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//When building the SDF we want to find the lowest distance at each SDF cell. In order to allow multiple threads to write to the same
//cells, it is necessary to use atomics. However, there is no support for atomics with 32-bit floats so we convert the float into unsigned int
//and use atomic_min() / InterlockedMin() as a workaround.
//
//When used with atomic_min, both FloatFlip2() and FloatFlip3() will store the float with the lowest magnitude.
//The difference is that FloatFlip2() will preper negative values ( InterlockedMin( FloatFlip2(1.0), FloatFlip2(-1.0) ) == -1.0 ),
//while FloatFlip3() prefers positive values ( InterlockedMin( FloatFlip3(1.0), FloatFlip3(-1.0) ) == 1.0 ).
//Using FloatFlip3() seems to result in a SDF with higher quality compared to FloatFlip2().
uint FloatFlip2(float fl)
{
uint f = asuint(fl);
return (f << 1) | (f >> 31 ^ 0x00000001); //Rotate sign bit to least significant and Flip sign bit so that (0 == negative)
}
uint IFloatFlip2(uint f2)
{
return (f2 >> 1) | (f2 << 31 ^ 0x80000000);
}
uint FloatFlip3(float fl)
{
uint f = asuint(fl);
return (f << 1) | (f >> 31); //Rotate sign bit to least significant
}
uint IFloatFlip3(uint f2)
{
return (f2 >> 1) | (f2 << 31);
}
float DistancePointToEdge(float3 p, float3 x0, float3 x1, out float3 n)
{
// Hack to swap to ensure the order is correct (.x only for simplicity)
if (x0.x > x1.x)
{
float3 temp = x0;
x0 = x1;
x1 = temp;
}
float3 x10 = x1 - x0;
float t = dot(x1 - p, x10) / dot(x10, x10);
t = max(0.0f, min(t, 1.0f));
float3 a = p - (t*x0 + (1.0f - t)*x1);
float d = length(a);
n = a / (d + 1e-30f);
return d;
}
// Check if p is in the positive or negative side of triangle (x0, x1, x2)
// Positive side is where the normal vector of triangle ( (x1-x0) x (x2-x0) ) is pointing to.
float SignedDistancePointToTriangle(float3 p, float3 x0, float3 x1, float3 x2)
{
float d = 0;
float3 x02 = x0 - x2;
float l0 = length(x02) + 1e-30f;
x02 = x02 / l0;
float3 x12 = x1 - x2;
float l1 = dot(x12, x02);
x12 = x12 - l1*x02;
float l2 = length(x12) + 1e-30f;
x12 = x12 / l2;
float3 px2 = p - x2;
float b = dot(x12, px2) / l2;
float a = (dot(x02, px2) - l1*b) / l0;
float c = 1 - a - b;
// normal vector of triangle. Don't need to normalize this yet.
float3 nTri = cross((x1 - x0), (x2 - x0));
float3 n;
float tol = 1e-8f;
if (a >= -tol && b >= -tol && c >= -tol)
{
n = p - (a*x0 + b*x1 + c*x2);
d = length(n);
float3 n1 = n / d;
float3 n2 = nTri / (length(nTri) + 1e-30f); // if d == 0
n = (d > 0) ? n1 : n2;
}
else
{
float3 n_12;
float3 n_02;
d = DistancePointToEdge(p, x0, x1, n);
float d12 = DistancePointToEdge(p, x1, x2, n_12);
float d02 = DistancePointToEdge(p, x0, x2, n_02);
d = min(d, d12);
d = min(d, d02);
n = (d == d12) ? n_12 : n;
n = (d == d02) ? n_02 : n;
}
d = (dot(p - x0, nTri) < 0.f) ? -d : d;
return d;
}