Refactor Mesh SDF generation on GPU to use raytracing for more precise results
This commit is contained in:
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8e8d210a74ae373793eaee1ddab1372a6a50a000c489f97b2258a09cd93cc2d0
|
||||
oid sha256:6a56dc14746606f0065d136ad0a69ae1aa41e8732ea380c657d75c187aa09f54
|
||||
size 5031
|
||||
|
||||
BIN
Content/Shaders/GlobalSignDistanceField.flax
(Stored with Git LFS)
BIN
Content/Shaders/GlobalSignDistanceField.flax
(Stored with Git LFS)
Binary file not shown.
BIN
Content/Shaders/SDF.flax
(Stored with Git LFS)
BIN
Content/Shaders/SDF.flax
(Stored with Git LFS)
Binary file not shown.
@@ -90,25 +90,15 @@ namespace FlaxEditor.Windows.Assets
|
||||
|
||||
var gpu = group.Checkbox("Bake on GPU", "If checked, SDF generation will be calculated using GPU on Compute Shader, otherwise CPU will use Job System. GPU generation is fast but result in artifacts in various meshes (eg. foliage).");
|
||||
gpu.CheckBox.Checked = sdfOptions.GPU;
|
||||
gpu.CheckBox.StateChanged += c => { Window._sdfOptions.GPU = c.Checked; };
|
||||
|
||||
var backfacesThresholdProp = group.AddPropertyItem("Backfaces Threshold", "Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh.");
|
||||
var backfacesThreshold = backfacesThresholdProp.FloatValue();
|
||||
var backfacesThresholdLabel = backfacesThresholdProp.Labels.Last();
|
||||
backfacesThreshold.ValueBox.MinValue = 0.001f;
|
||||
backfacesThreshold.ValueBox.MaxValue = 1.0f;
|
||||
backfacesThreshold.ValueBox.Value = sdfOptions.BackfacesThreshold;
|
||||
backfacesThreshold.ValueBox.BoxValueChanged += b => { Window._sdfOptions.BackfacesThreshold = b.Value; };
|
||||
|
||||
// Toggle Backfaces Threshold visibility (CPU-only option)
|
||||
gpu.CheckBox.StateChanged += c =>
|
||||
{
|
||||
Window._sdfOptions.GPU = c.Checked;
|
||||
backfacesThresholdLabel.Visible = !c.Checked;
|
||||
backfacesThreshold.ValueBox.Visible = !c.Checked;
|
||||
};
|
||||
backfacesThresholdLabel.Visible = !gpu.CheckBox.Checked;
|
||||
backfacesThreshold.ValueBox.Visible = !gpu.CheckBox.Checked;
|
||||
|
||||
var lodIndex = group.IntegerValue("LOD Index", "Index of the model Level of Detail to use for SDF data building. By default uses the lowest quality LOD for fast building.");
|
||||
lodIndex.IntValue.MinValue = 0;
|
||||
lodIndex.IntValue.MaxValue = Asset.LODsCount - 1;
|
||||
|
||||
@@ -92,9 +92,8 @@ float GPUTimerQueryDX11::GetResult()
|
||||
{
|
||||
if (!_finalized)
|
||||
{
|
||||
#if BUILD_DEBUG
|
||||
ASSERT(HasResult());
|
||||
#endif
|
||||
if (!HasResult())
|
||||
return 0;
|
||||
|
||||
UINT64 timeStart, timeEnd;
|
||||
auto context = _device->GetIM();
|
||||
|
||||
@@ -3,17 +3,29 @@
|
||||
#if COMPILE_WITH_MODEL_TOOL
|
||||
|
||||
#include "MeshAccelerationStructure.h"
|
||||
#include "Engine/Core/Log.h"
|
||||
#include "Engine/Core/Math/Math.h"
|
||||
#include "Engine/Content/Content.h"
|
||||
#include "Engine/Content/Assets/Model.h"
|
||||
#include "Engine/Graphics/GPUBuffer.h"
|
||||
#include "Engine/Graphics/Models/ModelData.h"
|
||||
#include "Engine/Profiler/ProfilerCPU.h"
|
||||
|
||||
void MeshAccelerationStructure::BuildBVH(int32 node, int32 maxLeafSize, Array<byte>& scratch)
|
||||
PACK_STRUCT(struct GPUBVH {
|
||||
Float3 BoundsMin;
|
||||
uint32 Index;
|
||||
Float3 BoundsMax;
|
||||
int32 Count; // Negative for non-leaf nodes
|
||||
});
|
||||
static_assert(sizeof(GPUBVH) == sizeof(Float4) * 2, "Invalid BVH structure size for GPU.");
|
||||
|
||||
void MeshAccelerationStructure::BuildBVH(int32 node, BVHBuild& build)
|
||||
{
|
||||
auto& root = _bvh[node];
|
||||
ASSERT_LOW_LAYER(root.Leaf.IsLeaf);
|
||||
if (root.Leaf.TriangleCount <= maxLeafSize)
|
||||
if (build.MaxLeafSize > 0 && root.Leaf.TriangleCount <= build.MaxLeafSize)
|
||||
return;
|
||||
if (build.MaxDepth > 0 && build.NodeDepth >= build.MaxDepth)
|
||||
return;
|
||||
|
||||
// Spawn two leaves
|
||||
@@ -64,8 +76,8 @@ RETRY:
|
||||
{
|
||||
uint16 I0, I1, I2;
|
||||
};
|
||||
scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
|
||||
auto dst = (Tri*)scratch.Get();
|
||||
build.Scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
|
||||
auto dst = (Tri*)build.Scratch.Get();
|
||||
auto ib16 = meshData.IndexBuffer.Get<uint16>();
|
||||
for (int32 i = indexStart; i < indexEnd;)
|
||||
{
|
||||
@@ -90,13 +102,13 @@ RETRY:
|
||||
indexStart = 0;
|
||||
indexEnd = left.Leaf.TriangleCount * 3;
|
||||
for (int32 i = indexStart; i < indexEnd; i++)
|
||||
left.Bounds.Merge(vb[((uint16*)scratch.Get())[i]]);
|
||||
left.Bounds.Merge(vb[((uint16*)build.Scratch.Get())[i]]);
|
||||
|
||||
right.Bounds = BoundingBox(vb[dst[root.Leaf.TriangleCount - 1].I0]);
|
||||
indexStart = left.Leaf.TriangleCount;
|
||||
indexEnd = root.Leaf.TriangleCount * 3;
|
||||
for (int32 i = indexStart; i < indexEnd; i++)
|
||||
right.Bounds.Merge(vb[((uint16*)scratch.Get())[i]]);
|
||||
right.Bounds.Merge(vb[((uint16*)build.Scratch.Get())[i]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -104,8 +116,8 @@ RETRY:
|
||||
{
|
||||
uint32 I0, I1, I2;
|
||||
};
|
||||
scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
|
||||
auto dst = (Tri*)scratch.Get();
|
||||
build.Scratch.Resize(root.Leaf.TriangleCount * sizeof(Tri));
|
||||
auto dst = (Tri*)build.Scratch.Get();
|
||||
auto ib32 = meshData.IndexBuffer.Get<uint32>();
|
||||
for (int32 i = indexStart; i < indexEnd;)
|
||||
{
|
||||
@@ -130,17 +142,19 @@ RETRY:
|
||||
indexStart = 0;
|
||||
indexEnd = left.Leaf.TriangleCount * 3;
|
||||
for (int32 i = indexStart; i < indexEnd; i++)
|
||||
left.Bounds.Merge(vb[((uint32*)scratch.Get())[i]]);
|
||||
left.Bounds.Merge(vb[((uint32*)build.Scratch.Get())[i]]);
|
||||
|
||||
right.Bounds = BoundingBox(vb[dst[root.Leaf.TriangleCount - 1].I0]);
|
||||
indexStart = left.Leaf.TriangleCount;
|
||||
indexEnd = root.Leaf.TriangleCount * 3;
|
||||
for (int32 i = indexStart; i < indexEnd; i++)
|
||||
right.Bounds.Merge(vb[((uint32*)scratch.Get())[i]]);
|
||||
right.Bounds.Merge(vb[((uint32*)build.Scratch.Get())[i]]);
|
||||
}
|
||||
ASSERT_LOW_LAYER(left.Leaf.TriangleCount + right.Leaf.TriangleCount == root.Leaf.TriangleCount);
|
||||
left.Leaf.TriangleIndex = root.Leaf.TriangleIndex;
|
||||
right.Leaf.TriangleIndex = left.Leaf.TriangleIndex + left.Leaf.TriangleCount;
|
||||
build.MaxNodeTriangles = Math::Max(build.MaxNodeTriangles, (int32)right.Leaf.TriangleCount);
|
||||
build.MaxNodeTriangles = Math::Max(build.MaxNodeTriangles, (int32)right.Leaf.TriangleCount);
|
||||
|
||||
// Convert into a node
|
||||
root.Node.IsLeaf = 0;
|
||||
@@ -148,8 +162,11 @@ RETRY:
|
||||
root.Node.ChildrenCount = 2;
|
||||
|
||||
// Split children
|
||||
BuildBVH(childIndex, maxLeafSize, scratch);
|
||||
BuildBVH(childIndex + 1, maxLeafSize, scratch);
|
||||
build.NodeDepth++;
|
||||
build.MaxNodeDepth = Math::Max(build.NodeDepth, build.MaxNodeDepth);
|
||||
BuildBVH(childIndex, build);
|
||||
BuildBVH(childIndex + 1, build);
|
||||
build.NodeDepth--;
|
||||
}
|
||||
|
||||
bool MeshAccelerationStructure::PointQueryBVH(int32 node, const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle) const
|
||||
@@ -160,7 +177,7 @@ bool MeshAccelerationStructure::PointQueryBVH(int32 node, const Vector3& point,
|
||||
{
|
||||
// Find closest triangle
|
||||
Vector3 p;
|
||||
const Mesh& meshData = _meshes[root.Leaf.MeshIndex];
|
||||
const Mesh& meshData = _meshes.Get()[root.Leaf.MeshIndex];
|
||||
const Float3* vb = meshData.VertexBuffer.Get<Float3>();
|
||||
const int32 indexStart = root.Leaf.TriangleIndex * 3;
|
||||
const int32 indexEnd = indexStart + root.Leaf.TriangleCount * 3;
|
||||
@@ -229,7 +246,7 @@ bool MeshAccelerationStructure::RayCastBVH(int32 node, const Ray& ray, Real& hit
|
||||
if (root.Leaf.IsLeaf)
|
||||
{
|
||||
// Ray cast along triangles in the leaf
|
||||
const Mesh& meshData = _meshes[root.Leaf.MeshIndex];
|
||||
const Mesh& meshData = _meshes.Get()[root.Leaf.MeshIndex];
|
||||
const Float3* vb = meshData.VertexBuffer.Get<Float3>();
|
||||
const int32 indexStart = root.Leaf.TriangleIndex * 3;
|
||||
const int32 indexEnd = indexStart + root.Leaf.TriangleCount * 3;
|
||||
@@ -381,6 +398,7 @@ void MeshAccelerationStructure::Add(const ModelData* modelData, int32 lodIndex,
|
||||
|
||||
void MeshAccelerationStructure::Add(Float3* vb, int32 vertices, void* ib, int32 indices, bool use16BitIndex, bool copy)
|
||||
{
|
||||
ASSERT(vertices % 3 == 0);
|
||||
auto& meshData = _meshes.AddOne();
|
||||
meshData.Asset = nullptr;
|
||||
if (copy)
|
||||
@@ -395,43 +413,122 @@ void MeshAccelerationStructure::Add(Float3* vb, int32 vertices, void* ib, int32
|
||||
meshData.Vertices = vertices;
|
||||
meshData.Indices = indices;
|
||||
meshData.Use16BitIndexBuffer = use16BitIndex;
|
||||
BoundingBox::FromPoints(meshData.VertexBuffer.Get<Float3>(), vertices, meshData.Bounds);
|
||||
}
|
||||
|
||||
void MeshAccelerationStructure::BuildBVH(int32 maxLeafSize)
|
||||
void MeshAccelerationStructure::MergeMeshes(bool force16BitIndexBuffer)
|
||||
{
|
||||
if (_meshes.Count() == 0)
|
||||
return;
|
||||
if (_meshes.Count() == 1 && (!force16BitIndexBuffer || !_meshes[0].Use16BitIndexBuffer))
|
||||
return;
|
||||
PROFILE_CPU();
|
||||
auto meshes = _meshes;
|
||||
_meshes.Clear();
|
||||
_meshes.Resize(1);
|
||||
auto& mesh = _meshes[0];
|
||||
mesh.Asset = nullptr;
|
||||
mesh.Use16BitIndexBuffer = true;
|
||||
mesh.Indices = 0;
|
||||
mesh.Vertices = 0;
|
||||
mesh.Bounds = meshes[0].Bounds;
|
||||
for (auto& e : meshes)
|
||||
{
|
||||
if (!e.Use16BitIndexBuffer)
|
||||
mesh.Use16BitIndexBuffer = false;
|
||||
mesh.Vertices += e.Vertices;
|
||||
mesh.Indices += e.Indices;
|
||||
BoundingBox::Merge(mesh.Bounds, e.Bounds, mesh.Bounds);
|
||||
}
|
||||
mesh.Use16BitIndexBuffer &= mesh.Indices <= MAX_uint16 && !force16BitIndexBuffer;
|
||||
mesh.VertexBuffer.Allocate(mesh.Vertices * sizeof(Float3));
|
||||
mesh.IndexBuffer.Allocate(mesh.Indices * sizeof(uint32));
|
||||
int32 vertexCounter = 0, indexCounter = 0;
|
||||
for (auto& e : meshes)
|
||||
{
|
||||
Platform::MemoryCopy(mesh.VertexBuffer.Get() + vertexCounter * sizeof(Float3), e.VertexBuffer.Get(), e.Vertices * sizeof(Float3));
|
||||
if (e.Use16BitIndexBuffer)
|
||||
{
|
||||
for (int32 i = 0; i < e.Indices; i++)
|
||||
{
|
||||
uint16 index = ((uint16*)e.IndexBuffer.Get())[i];
|
||||
((uint32*)mesh.IndexBuffer.Get())[indexCounter + i] = vertexCounter + index;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int32 i = 0; i < e.Indices; i++)
|
||||
{
|
||||
uint16 index = ((uint32*)e.IndexBuffer.Get())[i];
|
||||
((uint32*)mesh.IndexBuffer.Get())[indexCounter + i] = vertexCounter + index;
|
||||
}
|
||||
}
|
||||
vertexCounter += e.Vertices;
|
||||
indexCounter += e.Indices;
|
||||
if (e.Asset)
|
||||
e.Asset->RemoveReference();
|
||||
}
|
||||
}
|
||||
|
||||
void MeshAccelerationStructure::BuildBVH(int32 maxLeafSize, int32 maxDepth)
|
||||
{
|
||||
if (_meshes.Count() == 0)
|
||||
return;
|
||||
PROFILE_CPU();
|
||||
|
||||
BVHBuild build;
|
||||
build.MaxLeafSize = maxLeafSize;
|
||||
build.MaxDepth = maxDepth;
|
||||
|
||||
// Estimate memory usage
|
||||
int32 trianglesCount = 0;
|
||||
for (const Mesh& meshData : _meshes)
|
||||
trianglesCount += meshData.Indices / 3;
|
||||
_bvh.Clear();
|
||||
_bvh.EnsureCapacity(trianglesCount / maxLeafSize);
|
||||
_bvh.EnsureCapacity(trianglesCount / Math::Max(maxLeafSize, 16));
|
||||
|
||||
// Init with the root node and all meshes as leaves
|
||||
auto& root = _bvh.AddOne();
|
||||
root.Node.IsLeaf = 0;
|
||||
root.Node.ChildIndex = 1;
|
||||
root.Node.ChildrenCount = _meshes.Count();
|
||||
root.Bounds = _meshes[0].Bounds;
|
||||
for (int32 i = 0; i < _meshes.Count(); i++)
|
||||
// Skip using root node if BVH contains only one mesh
|
||||
if (_meshes.Count() == 1)
|
||||
{
|
||||
const Mesh& meshData = _meshes[i];
|
||||
const Mesh& meshData = _meshes.First();
|
||||
auto& child = _bvh.AddOne();
|
||||
child.Leaf.IsLeaf = 1;
|
||||
child.Leaf.MeshIndex = i;
|
||||
child.Leaf.MeshIndex = 0;
|
||||
child.Leaf.TriangleIndex = 0;
|
||||
child.Leaf.TriangleCount = meshData.Indices / 3;
|
||||
child.Bounds = meshData.Bounds;
|
||||
BoundingBox::Merge(root.Bounds, meshData.Bounds, root.Bounds);
|
||||
Array<byte> scratch;
|
||||
BuildBVH(0, build);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Init with the root node and all meshes as leaves
|
||||
auto& root = _bvh.AddOne();
|
||||
root.Node.IsLeaf = 0;
|
||||
root.Node.ChildIndex = 1;
|
||||
root.Node.ChildrenCount = _meshes.Count();
|
||||
root.Bounds = _meshes[0].Bounds;
|
||||
for (int32 i = 0; i < _meshes.Count(); i++)
|
||||
{
|
||||
const Mesh& meshData = _meshes[i];
|
||||
auto& child = _bvh.AddOne();
|
||||
child.Leaf.IsLeaf = 1;
|
||||
child.Leaf.MeshIndex = i;
|
||||
child.Leaf.TriangleIndex = 0;
|
||||
child.Leaf.TriangleCount = meshData.Indices / 3;
|
||||
child.Bounds = meshData.Bounds;
|
||||
BoundingBox::Merge(root.Bounds, meshData.Bounds, root.Bounds);
|
||||
}
|
||||
|
||||
// Sub-divide mesh nodes into smaller leaves
|
||||
build.MaxNodeDepth = build.MaxDepth = 2;
|
||||
Array<byte> scratch;
|
||||
for (int32 i = 0; i < _meshes.Count(); i++)
|
||||
BuildBVH(i + 1, build);
|
||||
build.NodeDepth = 0;
|
||||
}
|
||||
|
||||
// Sub-divide mesh nodes into smaller leaves
|
||||
Array<byte> scratch;
|
||||
for (int32 i = 0; i < _meshes.Count(); i++)
|
||||
BuildBVH(i + 1, maxLeafSize, scratch);
|
||||
LOG(Info, "BVH nodes: {}, max depth: {}, max triangles: {}", _bvh.Count(), build.MaxNodeDepth, build.MaxNodeTriangles);
|
||||
}
|
||||
|
||||
bool MeshAccelerationStructure::PointQuery(const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle, Real maxDistance) const
|
||||
@@ -579,4 +676,80 @@ bool MeshAccelerationStructure::RayCast(const Ray& ray, Real& hitDistance, Vecto
|
||||
}
|
||||
}
|
||||
|
||||
MeshAccelerationStructure::GPU::~GPU()
|
||||
{
|
||||
SAFE_DELETE_GPU_RESOURCE(BVHBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(VertexBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(IndexBuffer);
|
||||
}
|
||||
|
||||
MeshAccelerationStructure::GPU::operator bool() const
|
||||
{
|
||||
// Index buffer is initialized as last one so all other buffers are fine too
|
||||
return IndexBuffer && IndexBuffer->GetSize() != 0;
|
||||
}
|
||||
|
||||
MeshAccelerationStructure::GPU MeshAccelerationStructure::ToGPU()
|
||||
{
|
||||
PROFILE_CPU();
|
||||
GPU gpu;
|
||||
|
||||
// GPU BVH operates on a single mesh with 32-bit indices
|
||||
MergeMeshes(true);
|
||||
|
||||
// Construct BVH
|
||||
const int32 BVH_STACK_SIZE = 32; // This must match HLSL shader
|
||||
BuildBVH(0, BVH_STACK_SIZE);
|
||||
|
||||
// Upload BVH
|
||||
{
|
||||
Array<GPUBVH> bvhData;
|
||||
bvhData.Resize(_bvh.Count());
|
||||
for (int32 i = 0; i < _bvh.Count(); i++)
|
||||
{
|
||||
const auto& src = _bvh.Get()[i];
|
||||
auto& dst = bvhData.Get()[i];
|
||||
dst.BoundsMin = src.Bounds.Minimum;
|
||||
dst.BoundsMax = src.Bounds.Maximum;
|
||||
if (src.Leaf.IsLeaf)
|
||||
{
|
||||
dst.Index = src.Leaf.TriangleIndex * 3;
|
||||
dst.Count = src.Leaf.TriangleCount * 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
dst.Index = src.Node.ChildIndex;
|
||||
dst.Count = -(int32)src.Node.ChildrenCount; // Mark as non-leaf
|
||||
ASSERT(src.Node.ChildrenCount == 2); // GPU shader is hardcoded for 2 children per node
|
||||
}
|
||||
}
|
||||
gpu.BVHBuffer = GPUBuffer::New();
|
||||
auto desc =GPUBufferDescription::Structured(_bvh.Count(), sizeof(GPUBVH));
|
||||
desc.InitData = bvhData.Get();
|
||||
if (gpu.BVHBuffer->Init(desc))
|
||||
return gpu;
|
||||
}
|
||||
|
||||
// Upload vertex buffer
|
||||
{
|
||||
const Mesh& mesh = _meshes[0];
|
||||
gpu.VertexBuffer = GPUBuffer::New();
|
||||
auto desc = GPUBufferDescription::Raw(mesh.Vertices * sizeof(Float3), GPUBufferFlags::ShaderResource);
|
||||
desc.InitData = mesh.VertexBuffer.Get();
|
||||
if (gpu.VertexBuffer->Init(desc))
|
||||
return gpu;
|
||||
}
|
||||
|
||||
// Upload index buffer
|
||||
{
|
||||
const Mesh& mesh = _meshes[0];
|
||||
gpu.IndexBuffer = GPUBuffer::New();
|
||||
auto desc = GPUBufferDescription::Raw(mesh.Indices * sizeof(uint32), GPUBufferFlags::ShaderResource);
|
||||
desc.InitData = mesh.IndexBuffer.Get();
|
||||
gpu.IndexBuffer->Init(desc);
|
||||
}
|
||||
|
||||
return gpu;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
class Model;
|
||||
class ModelData;
|
||||
class GPUBuffer;
|
||||
|
||||
/// <summary>
|
||||
/// Acceleration Structure utility for robust ray tracing mesh geometry with optimized data structure.
|
||||
@@ -50,10 +51,19 @@ private:
|
||||
};
|
||||
};
|
||||
|
||||
struct BVHBuild
|
||||
{
|
||||
int32 MaxLeafSize, MaxDepth;
|
||||
int32 NodeDepth = 0;
|
||||
int32 MaxNodeDepth = 0;
|
||||
int32 MaxNodeTriangles = 0;
|
||||
Array<byte> Scratch;
|
||||
};
|
||||
|
||||
Array<Mesh, InlinedAllocation<16>> _meshes;
|
||||
Array<BVH> _bvh;
|
||||
|
||||
void BuildBVH(int32 node, int32 maxLeafSize, Array<byte>& scratch);
|
||||
void BuildBVH(int32 node, BVHBuild& build);
|
||||
bool PointQueryBVH(int32 node, const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle) const;
|
||||
bool RayCastBVH(int32 node, const Ray& ray, Real& hitDistance, Vector3& hitNormal, Triangle& hitTriangle) const;
|
||||
|
||||
@@ -69,14 +79,56 @@ public:
|
||||
// Adds the triangles geometry for the build to the structure.
|
||||
void Add(Float3* vb, int32 vertices, void* ib, int32 indices, bool use16BitIndex, bool copy = false);
|
||||
|
||||
// Merges all added meshes into a single mesh (to reduce number of BVH nodes). Required for GPU BVH build.
|
||||
void MergeMeshes(bool force16BitIndexBuffer = false);
|
||||
|
||||
// Builds Bounding Volume Hierarchy (BVH) structure for accelerated geometry queries.
|
||||
void BuildBVH(int32 maxLeafSize = 16);
|
||||
void BuildBVH(int32 maxLeafSize = 16, int32 maxDepth = 0);
|
||||
|
||||
// Queries the closest triangle.
|
||||
bool PointQuery(const Vector3& point, Real& hitDistance, Vector3& hitPoint, Triangle& hitTriangle, Real maxDistance = MAX_Real) const;
|
||||
|
||||
// Ray traces the triangles.
|
||||
bool RayCast(const Ray& ray, Real& hitDistance, Vector3& hitNormal, Triangle& hitTriangle, Real maxDistance = MAX_Real) const;
|
||||
|
||||
public:
|
||||
struct GPU
|
||||
{
|
||||
GPUBuffer* BVHBuffer;
|
||||
GPUBuffer* VertexBuffer;
|
||||
GPUBuffer* IndexBuffer;
|
||||
bool Valid;
|
||||
|
||||
GPU()
|
||||
: BVHBuffer(nullptr)
|
||||
, VertexBuffer(nullptr)
|
||||
, IndexBuffer(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
GPU(GPU&& other) noexcept
|
||||
: BVHBuffer(other.BVHBuffer)
|
||||
, VertexBuffer(other.VertexBuffer)
|
||||
, IndexBuffer(other.IndexBuffer)
|
||||
{
|
||||
other.BVHBuffer = nullptr;
|
||||
other.VertexBuffer = nullptr;
|
||||
other.IndexBuffer = nullptr;
|
||||
}
|
||||
|
||||
GPU& operator=(GPU other)
|
||||
{
|
||||
Swap(*this, other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~GPU();
|
||||
|
||||
operator bool() const;
|
||||
};
|
||||
|
||||
// Converts the acceleration structure data to GPU format for raytracing inside a shader.
|
||||
GPU ToGPU();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "Engine/Threading/Threading.h"
|
||||
#include "Engine/Graphics/GPUDevice.h"
|
||||
#include "Engine/Graphics/GPUBuffer.h"
|
||||
#include "Engine/Graphics/GPUTimerQuery.h"
|
||||
#include "Engine/Graphics/RenderTools.h"
|
||||
#include "Engine/Graphics/Async/GPUTask.h"
|
||||
#include "Engine/Graphics/Shaders/GPUShader.h"
|
||||
@@ -81,14 +82,18 @@ class GPUModelSDFTask : public GPUTask
|
||||
{
|
||||
ConditionVariable* _signal;
|
||||
AssetReference<Shader> _shader;
|
||||
MeshAccelerationStructure* _scene;
|
||||
Model* _inputModel;
|
||||
const ModelData* _modelData;
|
||||
int32 _lodIndex;
|
||||
float _backfacesThreshold;
|
||||
Int3 _resolution;
|
||||
ModelBase::SDFData* _sdf;
|
||||
GPUBuffer *_sdfSrc, *_sdfDst;
|
||||
GPUTexture* _sdfResult;
|
||||
Float3 _xyzToLocalMul, _xyzToLocalAdd;
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
GPUTimerQuery* _timerQuery;
|
||||
#endif
|
||||
|
||||
const uint32 ThreadGroupSize = 64;
|
||||
GPU_CB_STRUCT(Data {
|
||||
@@ -96,7 +101,7 @@ class GPUModelSDFTask : public GPUTask
|
||||
uint32 ResolutionSize;
|
||||
float MaxDistance;
|
||||
uint32 VertexStride;
|
||||
int32 Index16bit;
|
||||
float BackfacesThreshold;
|
||||
uint32 TriangleCount;
|
||||
Float3 VoxelToPosMul;
|
||||
float WorldUnitsPerVoxel;
|
||||
@@ -105,47 +110,46 @@ class GPUModelSDFTask : public GPUTask
|
||||
});
|
||||
|
||||
public:
|
||||
GPUModelSDFTask(ConditionVariable& signal, Model* inputModel, const ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd)
|
||||
: GPUTask(Type::Custom)
|
||||
GPUModelSDFTask(ConditionVariable& signal, MeshAccelerationStructure* scene, Model* inputModel, const ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd, float backfacesThreshold)
|
||||
: GPUTask(Type::Custom, GPU_ALLOW_PROFILE_EVENTS ? 4 : GPU_ASYNC_LATENCY) // Fix timer query result reading with some more latency
|
||||
, _signal(&signal)
|
||||
, _shader(Content::LoadAsyncInternal<Shader>(TEXT("Shaders/SDF")))
|
||||
, _scene(scene)
|
||||
, _inputModel(inputModel)
|
||||
, _modelData(modelData)
|
||||
, _lodIndex(lodIndex)
|
||||
, _backfacesThreshold(backfacesThreshold)
|
||||
, _resolution(resolution)
|
||||
, _sdf(sdf)
|
||||
, _sdfSrc(GPUBuffer::New())
|
||||
, _sdfDst(GPUBuffer::New())
|
||||
, _sdfResult(sdfResult)
|
||||
, _xyzToLocalMul(xyzToLocalMul)
|
||||
, _xyzToLocalAdd(xyzToLocalAdd)
|
||||
{
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
_sdfSrc->SetName(TEXT("SDFSrc"));
|
||||
_sdfDst->SetName(TEXT("SDFDst"));
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
, _timerQuery(GPUDevice::Instance->CreateTimerQuery())
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
~GPUModelSDFTask()
|
||||
{
|
||||
SAFE_DELETE_GPU_RESOURCE(_sdfSrc);
|
||||
SAFE_DELETE_GPU_RESOURCE(_sdfDst);
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
SAFE_DELETE_GPU_RESOURCE(_timerQuery);
|
||||
#endif
|
||||
}
|
||||
|
||||
Result run(GPUTasksContext* tasksContext) override
|
||||
{
|
||||
PROFILE_GPU_CPU("GPUModelSDFTask");
|
||||
GPUContext* context = tasksContext->GPU;
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
_timerQuery->Begin();
|
||||
#endif
|
||||
|
||||
// Allocate resources
|
||||
if (_shader == nullptr || _shader->WaitForLoaded())
|
||||
return Result::Failed;
|
||||
GPUShader* shader = _shader->GetShader();
|
||||
const uint32 resolutionSize = _resolution.X * _resolution.Y * _resolution.Z;
|
||||
auto desc = GPUBufferDescription::Typed(resolutionSize, PixelFormat::R32_UInt, true);
|
||||
// TODO: use transient texture (single frame)
|
||||
if (_sdfSrc->Init(desc) || _sdfDst->Init(desc))
|
||||
return Result::Failed;
|
||||
auto cb = shader->GetCB(0);
|
||||
Data data;
|
||||
data.Resolution = _resolution;
|
||||
@@ -154,6 +158,13 @@ public:
|
||||
data.WorldUnitsPerVoxel = _sdf->WorldUnitsPerVoxel;
|
||||
data.VoxelToPosMul = _xyzToLocalMul;
|
||||
data.VoxelToPosAdd = _xyzToLocalAdd;
|
||||
data.BackfacesThreshold = _backfacesThreshold - 0.05f; // Bias a bit
|
||||
|
||||
// Send BVH to the GPU
|
||||
auto bvh = _scene->ToGPU();
|
||||
CHECK_RETURN(bvh.BVHBuffer && bvh.VertexBuffer && bvh.IndexBuffer, Result::Failed);
|
||||
data.VertexStride = sizeof(Float3);
|
||||
data.TriangleCount = bvh.IndexBuffer->GetElementsCount() / 3;
|
||||
|
||||
// Dispatch in 1D and fallback to 2D when using large resolution
|
||||
Int3 threadGroups(Math::CeilToInt((float)resolutionSize / ThreadGroupSize), 1, 1);
|
||||
@@ -165,159 +176,34 @@ public:
|
||||
}
|
||||
data.ThreadGroupsX = threadGroups.X;
|
||||
|
||||
// Init SDF volume
|
||||
// Init constants
|
||||
context->BindCB(0, cb);
|
||||
context->UpdateCB(cb, &data);
|
||||
context->BindUA(0, _sdfSrc->View());
|
||||
context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
|
||||
// Rendering input triangles into the SDF volume
|
||||
if (_inputModel)
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Rasterize");
|
||||
const ModelLOD& lod = _inputModel->LODs[Math::Clamp(_lodIndex, _inputModel->HighestResidentLODIndex(), _inputModel->LODs.Count() - 1)];
|
||||
GPUBuffer *vbTemp = nullptr, *ibTemp = nullptr;
|
||||
for (int32 i = 0; i < lod.Meshes.Count(); i++)
|
||||
{
|
||||
const Mesh& mesh = lod.Meshes[i];
|
||||
const MaterialSlot& materialSlot = _inputModel->MaterialSlots[mesh.GetMaterialSlotIndex()];
|
||||
if (materialSlot.Material && !materialSlot.Material->WaitForLoaded())
|
||||
{
|
||||
// Skip transparent materials
|
||||
if (materialSlot.Material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
|
||||
continue;
|
||||
}
|
||||
|
||||
GPUBuffer* vb = mesh.GetVertexBuffer(0);
|
||||
GPUBuffer* ib = mesh.GetIndexBuffer();
|
||||
data.Index16bit = mesh.Use16BitIndexBuffer() ? 1 : 0;
|
||||
data.VertexStride = vb->GetStride();
|
||||
data.TriangleCount = mesh.GetTriangleCount();
|
||||
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
|
||||
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
|
||||
{
|
||||
// TODO: support larger meshes via 2D dispatch
|
||||
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
|
||||
continue;
|
||||
}
|
||||
context->UpdateCB(cb, &data);
|
||||
if (!EnumHasAllFlags(vb->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
|
||||
{
|
||||
desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource);
|
||||
// TODO: use transient buffer (single frame)
|
||||
if (!vbTemp)
|
||||
{
|
||||
vbTemp = GPUBuffer::New();
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
vbTemp->SetName(TEXT("SDFvb"));
|
||||
#endif
|
||||
}
|
||||
vbTemp->Init(desc);
|
||||
context->CopyBuffer(vbTemp, vb, desc.Size);
|
||||
vb = vbTemp;
|
||||
}
|
||||
if (!EnumHasAllFlags(ib->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
|
||||
{
|
||||
desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource);
|
||||
// TODO: use transient buffer (single frame)
|
||||
if (!ibTemp)
|
||||
{
|
||||
ibTemp = GPUBuffer::New();
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
ibTemp->SetName(TEXT("SDFib"));
|
||||
#endif
|
||||
}
|
||||
ibTemp->Init(desc);
|
||||
context->CopyBuffer(ibTemp, ib, desc.Size);
|
||||
ib = ibTemp;
|
||||
}
|
||||
context->BindSR(0, vb->View());
|
||||
context->BindSR(1, ib->View());
|
||||
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
|
||||
}
|
||||
SAFE_DELETE_GPU_RESOURCE(vbTemp);
|
||||
SAFE_DELETE_GPU_RESOURCE(ibTemp);
|
||||
}
|
||||
else if (_modelData)
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Rasterize");
|
||||
const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)];
|
||||
auto vb = GPUBuffer::New();
|
||||
auto ib = GPUBuffer::New();
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
vb->SetName(TEXT("SDFvb"));
|
||||
ib->SetName(TEXT("SDFib"));
|
||||
#endif
|
||||
for (int32 i = 0; i < lod.Meshes.Count(); i++)
|
||||
{
|
||||
const MeshData* mesh = lod.Meshes[i];
|
||||
const MaterialSlotEntry& materialSlot = _modelData->Materials[mesh->MaterialSlotIndex];
|
||||
auto material = Content::LoadAsync<MaterialBase>(materialSlot.AssetID);
|
||||
if (material && !material->WaitForLoaded())
|
||||
{
|
||||
// Skip transparent materials
|
||||
if (material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
|
||||
continue;
|
||||
}
|
||||
|
||||
data.Index16bit = 0;
|
||||
data.VertexStride = sizeof(Float3);
|
||||
data.TriangleCount = mesh->Indices.Count() / 3;
|
||||
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
|
||||
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
|
||||
{
|
||||
// TODO: support larger meshes via 2D dispatch
|
||||
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
|
||||
continue;
|
||||
}
|
||||
context->UpdateCB(cb, &data);
|
||||
desc = GPUBufferDescription::Raw(mesh->Positions.Count() * sizeof(Float3), GPUBufferFlags::ShaderResource);
|
||||
desc.InitData = mesh->Positions.Get();
|
||||
// TODO: use transient buffer (single frame)
|
||||
vb->Init(desc);
|
||||
desc = GPUBufferDescription::Raw(mesh->Indices.Count() * sizeof(uint32), GPUBufferFlags::ShaderResource);
|
||||
desc.InitData = mesh->Indices.Get();
|
||||
// TODO: use transient buffer (single frame)
|
||||
ib->Init(desc);
|
||||
context->BindSR(0, vb->View());
|
||||
context->BindSR(1, ib->View());
|
||||
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
|
||||
}
|
||||
SAFE_DELETE_GPU_RESOURCE(vb);
|
||||
SAFE_DELETE_GPU_RESOURCE(ib);
|
||||
}
|
||||
|
||||
// Convert SDF volume data back to floats
|
||||
context->Dispatch(shader->GetCS("CS_Resolve"), threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
|
||||
// Run linear flood-fill loop to populate all voxels with valid distances (spreads the initial values from triangles rasterization)
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("FloodFill");
|
||||
auto csFloodFill = shader->GetCS("CS_FloodFill");
|
||||
const int32 floodFillIterations = Math::Max(_resolution.MaxValue() / 2 + 1, 8);
|
||||
for (int32 floodFill = 0; floodFill < floodFillIterations; floodFill++)
|
||||
{
|
||||
context->ResetUA();
|
||||
context->BindUA(0, _sdfDst->View());
|
||||
context->BindSR(0, _sdfSrc->View());
|
||||
context->Dispatch(csFloodFill, threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
Swap(_sdfSrc, _sdfDst);
|
||||
}
|
||||
}
|
||||
|
||||
// Encode SDF values into output storage
|
||||
context->ResetUA();
|
||||
context->BindSR(0, _sdfSrc->View());
|
||||
// TODO: update GPU SDF texture within this task to skip additional CPU->GPU copy
|
||||
auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget);
|
||||
// Allocate output texture
|
||||
auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess);
|
||||
// TODO: use transient texture (single frame)
|
||||
auto sdfTexture = GPUTexture::New();
|
||||
#if GPU_ENABLE_RESOURCE_NAMING
|
||||
sdfTexture->SetName(TEXT("SDFTexture"));
|
||||
#endif
|
||||
sdfTexture->Init(sdfTextureDesc);
|
||||
context->BindUA(1, sdfTexture->ViewVolume());
|
||||
context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
|
||||
// Renders directly to the output texture
|
||||
context->BindUA(0, sdfTexture->ViewVolume());
|
||||
|
||||
// Init the volume (rasterization mixes with existing contents)
|
||||
context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
|
||||
// Render input triangles into the SDF volume
|
||||
{
|
||||
PROFILE_GPU("Rasterize");
|
||||
context->BindSR(0, bvh.VertexBuffer->View());
|
||||
context->BindSR(1, bvh.IndexBuffer->View());
|
||||
context->BindSR(2, bvh.BVHBuffer->View());
|
||||
auto* rasterizeCS = shader->GetCS("CS_RasterizeTriangles");
|
||||
context->Dispatch(rasterizeCS, threadGroups.X, threadGroups.Y, threadGroups.Z);
|
||||
}
|
||||
|
||||
// Copy result data into readback buffer
|
||||
if (_sdfResult)
|
||||
@@ -329,6 +215,9 @@ public:
|
||||
|
||||
SAFE_DELETE_GPU_RESOURCE(sdfTexture);
|
||||
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
_timerQuery->End();
|
||||
#endif
|
||||
return Result::Ok;
|
||||
}
|
||||
|
||||
@@ -336,6 +225,10 @@ public:
|
||||
{
|
||||
GPUTask::OnSync();
|
||||
_signal->NotifyOne();
|
||||
#if GPU_ALLOW_PROFILE_EVENTS
|
||||
if (_timerQuery->HasResult())
|
||||
LOG(Info, "GPU SDF generation took {} ms", Utilities::RoundTo1DecimalPlace(_timerQuery->GetResult()));
|
||||
#endif
|
||||
}
|
||||
|
||||
void OnFail() override
|
||||
@@ -445,6 +338,13 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
|
||||
// http://ramakarl.com/pdfs/2016_Hoetzlein_GVDB.pdf
|
||||
// https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf
|
||||
|
||||
// Setup acceleration structure for fast ray tracing the mesh triangles
|
||||
MeshAccelerationStructure scene;
|
||||
if (inputModel)
|
||||
scene.Add(inputModel, lodIndex);
|
||||
else if (modelData)
|
||||
scene.Add(modelData, lodIndex);
|
||||
|
||||
// Check if run SDF generation on a GPU via Compute Shader or on a Job System
|
||||
useGPU &= GPUDevice::Instance
|
||||
&& GPUDevice::Instance->GetState() == GPUDevice::DeviceState::Ready
|
||||
@@ -465,7 +365,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
|
||||
// Run SDF generation via GPU async task
|
||||
ConditionVariable signal;
|
||||
CriticalSection mutex;
|
||||
Task* task = New<GPUModelSDFTask>(signal, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd);
|
||||
Task* task = New<GPUModelSDFTask>(signal, &scene, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd, backfacesThreshold);
|
||||
task->Start();
|
||||
mutex.Lock();
|
||||
signal.Wait(mutex);
|
||||
@@ -489,16 +389,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
|
||||
}
|
||||
else
|
||||
{
|
||||
// Setup acceleration structure for fast ray tracing the mesh triangles
|
||||
MeshAccelerationStructure scene;
|
||||
if (inputModel)
|
||||
scene.Add(inputModel, lodIndex);
|
||||
else if (modelData)
|
||||
scene.Add(modelData, lodIndex);
|
||||
scene.BuildBVH();
|
||||
|
||||
// Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
|
||||
constexpr int32 sampleCount = 12;
|
||||
constexpr int32 sampleCount = BUILD_DEBUG ? 6 : 12;
|
||||
Float3 sampleDirections[sampleCount];
|
||||
{
|
||||
RandomStream rand;
|
||||
@@ -526,36 +420,30 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, const ModelData* modelData,
|
||||
Real minDistance = sdf.MaxDistance;
|
||||
Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
|
||||
|
||||
// Point query to find the distance to the closest surface
|
||||
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
|
||||
|
||||
// Raycast samples around voxel to count triangle backfaces hit
|
||||
int32 hitBackCount = 0, hitCount = 0;
|
||||
int32 hitBackCount = 0, minBackfaceHitCount = (int32)(sampleCount * backfacesThreshold);
|
||||
for (int32 sample = 0; sample < sampleCount; sample++)
|
||||
{
|
||||
Ray sampleRay(voxelPos, sampleDirections[sample]);
|
||||
sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
|
||||
if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
|
||||
{
|
||||
if (hitDistance < minDistance)
|
||||
minDistance = hitDistance;
|
||||
hitCount++;
|
||||
const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
|
||||
if (backHit)
|
||||
hitBackCount++;
|
||||
minDistance = Math::Min(hitDistance, minDistance);
|
||||
if (Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0)
|
||||
{
|
||||
if (++hitBackCount >= minBackfaceHitCount)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float distance = (float)minDistance;
|
||||
// TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
|
||||
// if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
|
||||
if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
|
||||
{
|
||||
// Voxel is inside the geometry so turn it into negative distance to the surface
|
||||
distance *= -1;
|
||||
}
|
||||
// Point query to find the distance to the closest surface
|
||||
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle, minDistance);
|
||||
if (hitBackCount >= minBackfaceHitCount)
|
||||
minDistance *= -1; // Voxel is inside the geometry so turn it into negative distance to the surface
|
||||
|
||||
const int32 xAddress = x + yAddress;
|
||||
formatWrite(voxels.Get() + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
|
||||
formatWrite(voxels.Get() + xAddress * formatStride, minDistance * encodeMAD.X + encodeMAD.Y);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -18,6 +18,26 @@ bool RayHitRect(float3 r, float3 rectCenter, float3 rectX, float3 rectY, float3
|
||||
return inExtentX && inExtentY;
|
||||
}
|
||||
|
||||
// Determines whether there is an intersection between a ray (rPos and rDir) and a triangle (v0, v1, v2).
|
||||
// Returns true on intersection and outputs the distance along the ray to the intersection point.
|
||||
// This method tests if the ray intersects either the front or back of the triangle.
|
||||
bool RayIntersectsTriangle(float3 rPos, float3 rDir, float3 v0, float3 v1, float3 v2, out float distance)
|
||||
{
|
||||
// [https://stackoverflow.com/a/42752998]
|
||||
float3 edgeAB = v1 - v0;
|
||||
float3 edgeAC = v2 - v0;
|
||||
float3 triFaceVector = cross(edgeAB, edgeAC);
|
||||
float3 vertRayOffset = rPos - v0;
|
||||
float3 rayOffsetPerp = cross(vertRayOffset, rDir);
|
||||
float determinant = -dot(rDir, triFaceVector);
|
||||
float invDet = 1.0f / determinant;
|
||||
distance = dot(vertRayOffset, triFaceVector) * invDet;
|
||||
float u = dot(edgeAC, rayOffsetPerp) * invDet;
|
||||
float v = -dot(edgeAB, rayOffsetPerp) * invDet;
|
||||
float w = 1.0f - u - v;
|
||||
return abs(determinant) >= 1E-8 && distance > 0 && u >= 0 && v >= 0 && w >= 0;
|
||||
}
|
||||
|
||||
// Hits axis-aligned box (boxMin, boxMax) with a line (lineStart, lineEnd).
|
||||
// Returns the intersections on the line (x - closest, y - furthest).
|
||||
// Line hits the box if: intersections.x < intersections.y.
|
||||
@@ -42,4 +62,39 @@ bool BoxIntersectsSphere(float3 boxMin, float3 boxMax, float3 sphereCenter, floa
|
||||
return distance(sphereCenter, clampedCenter) <= sphereRadius;
|
||||
}
|
||||
|
||||
// Calculates unsigned distance from point to the AABB. If point is inside it, returns 0.
|
||||
float PointDistanceBox(float3 boxMin, float3 boxMax, float3 pos)
|
||||
{
|
||||
float3 clampedPos = clamp(pos, boxMin, boxMax);
|
||||
return length(clampedPos - pos);
|
||||
}
|
||||
|
||||
float dot2(float3 v)
|
||||
{
|
||||
return dot(v, v);
|
||||
}
|
||||
|
||||
// Calculates squared distance from point to the triangle.
|
||||
float DistancePointToTriangle2(float3 p, float3 v1, float3 v2, float3 v3)
|
||||
{
|
||||
// [Inigo Quilez, https://iquilezles.org/articles/triangledistance/]
|
||||
float3 v21 = v2 - v1; float3 p1 = p - v1;
|
||||
float3 v32 = v3 - v2; float3 p2 = p - v2;
|
||||
float3 v13 = v1 - v3; float3 p3 = p - v3;
|
||||
float3 nor = cross(v21, v13);
|
||||
return // inside/outside test
|
||||
(sign(dot(cross(v21, nor), p1)) +
|
||||
sign(dot(cross(v32, nor), p2)) +
|
||||
sign(dot(cross(v13, nor), p3)) < 2.0)
|
||||
?
|
||||
// 3 edges
|
||||
min(min(
|
||||
dot2(v21 * saturate(dot(v21, p1) / dot2(v21)) - p1),
|
||||
dot2(v32 * saturate(dot(v32, p2) / dot2(v32)) - p2)),
|
||||
dot2(v13 * saturate(dot(v13, p3) / dot2(v13)) - p3))
|
||||
:
|
||||
// 1 face
|
||||
dot(nor, p1) * dot(nor, p1) / dot2(nor);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
163
Source/Shaders/MeshAccelerationStructure.hlsl
Normal file
163
Source/Shaders/MeshAccelerationStructure.hlsl
Normal file
@@ -0,0 +1,163 @@
|
||||
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||
|
||||
#ifndef __MESH_ACCELERATION_STRUCTURE__
|
||||
#define __MESH_ACCELERATION_STRUCTURE__
|
||||
|
||||
#include "./Flax/Collisions.hlsl"
|
||||
|
||||
// This must match MeshAccelerationStructure::ToGPU
|
||||
#define BVH_STACK_SIZE 32
|
||||
|
||||
struct BVHNode
|
||||
{
|
||||
float3 BoundsMin;
|
||||
uint Index;
|
||||
float3 BoundsMax;
|
||||
int Count; // Negative for non-leaf nodes
|
||||
};
|
||||
|
||||
struct BVHBuffers
|
||||
{
|
||||
StructuredBuffer<BVHNode> BVHBuffer;
|
||||
ByteAddressBuffer VertexBuffer;
|
||||
ByteAddressBuffer IndexBuffer;
|
||||
uint VertexStride;
|
||||
};
|
||||
|
||||
struct BVHHit
|
||||
{
|
||||
float Distance;
|
||||
bool IsBackface;
|
||||
};
|
||||
|
||||
float3 LoadVertexBVH(BVHBuffers bvh, uint index)
|
||||
{
|
||||
index = bvh.IndexBuffer.Load(index << 2u);
|
||||
return asfloat(bvh.VertexBuffer.Load3(index * bvh.VertexStride));
|
||||
}
|
||||
|
||||
// [https://tavianator.com/2011/ray_box.html]
|
||||
float RayTestBoxBVH(float3 rayPos, float3 rayDir, float3 boxMin, float3 boxMax)
|
||||
{
|
||||
float3 rayInvDir = rcp(rayDir);
|
||||
float3 tMin = (boxMin - rayPos) * rayInvDir;
|
||||
float3 tMax = (boxMax - rayPos) * rayInvDir;
|
||||
float3 t1 = min(tMin, tMax);
|
||||
float tNear = max(max(t1.x, t1.y), t1.z);
|
||||
float3 t2 = max(tMin, tMax);
|
||||
float tFar = min(min(t2.x, t2.y), t2.z);
|
||||
bool hit = tFar >= tNear && tFar > 0;
|
||||
return hit ? max(tNear, 0) : -1;
|
||||
}
|
||||
|
||||
// Performs raytracing against the BVH acceleration structure to find the closest intersection with a triangle.
|
||||
bool RayCastBVH(BVHBuffers bvh, float3 rayPos, float3 rayDir, out BVHHit hit, float maxDistance = 1000000.0f)
|
||||
{
|
||||
hit = (BVHHit)0;
|
||||
hit.Distance = maxDistance;
|
||||
|
||||
// Stack-based recursion, starts from root node
|
||||
uint stack[BVH_STACK_SIZE];
|
||||
uint stackCount = 1;
|
||||
stack[0] = 0;
|
||||
|
||||
bool result = false;
|
||||
LOOP
|
||||
while (stackCount > 0)
|
||||
{
|
||||
BVHNode node = bvh.BVHBuffer[stack[--stackCount]];
|
||||
|
||||
// Raytrace bounds
|
||||
float boundsHit = RayTestBoxBVH(rayPos, rayDir, node.BoundsMin, node.BoundsMax);
|
||||
BRANCH
|
||||
if (boundsHit >= 0 && boundsHit < hit.Distance)
|
||||
{
|
||||
BRANCH
|
||||
if (node.Count > 0) // Is leaf?
|
||||
{
|
||||
// Ray cast along all triangles in the leaf
|
||||
uint indexStart = node.Index;
|
||||
uint indexEnd = indexStart + node.Count;
|
||||
for (uint i = indexStart; i < indexEnd;)
|
||||
{
|
||||
// Load triangle
|
||||
float3 v0 = LoadVertexBVH(bvh, i++);
|
||||
float3 v1 = LoadVertexBVH(bvh, i++);
|
||||
float3 v2 = LoadVertexBVH(bvh, i++);
|
||||
|
||||
// Raytrace triangle
|
||||
float distance;
|
||||
if (RayIntersectsTriangle(rayPos, rayDir, v0, v1, v2, distance) && distance < hit.Distance)
|
||||
{
|
||||
float3 n = normalize(cross(v1 - v0, v2 - v0));
|
||||
hit.Distance = distance;
|
||||
hit.IsBackface = dot(rayDir, n) > 0;
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Push children onto the stack to be tested
|
||||
stack[stackCount++] = node.Index + 0;
|
||||
stack[stackCount++] = node.Index + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Performs a query against the BVH acceleration structure to find the closest distance to a triangle from a given point.
|
||||
bool PointQueryBVH(BVHBuffers bvh, float3 pos, out BVHHit hit, float maxDistance = 1000000.0f)
|
||||
{
|
||||
hit = (BVHHit)0;
|
||||
hit.Distance = maxDistance;
|
||||
|
||||
// Stack-based recursion, starts from root node
|
||||
uint stack[BVH_STACK_SIZE];
|
||||
uint stackCount = 1;
|
||||
stack[0] = 0;
|
||||
|
||||
bool result = false;
|
||||
LOOP
|
||||
while (stackCount > 0)
|
||||
{
|
||||
BVHNode node = bvh.BVHBuffer[stack[--stackCount]];
|
||||
|
||||
// Skip too far nodes
|
||||
if (PointDistanceBox(node.BoundsMin, node.BoundsMax, pos) >= hit.Distance)
|
||||
continue;
|
||||
|
||||
BRANCH
|
||||
if (node.Count > 0) // Is leaf?
|
||||
{
|
||||
// Find the closest triangles in the leaf
|
||||
uint indexStart = node.Index;
|
||||
uint indexEnd = indexStart + node.Count;
|
||||
for (uint i = indexStart; i < indexEnd;)
|
||||
{
|
||||
// Load triangle
|
||||
float3 v0 = LoadVertexBVH(bvh, i++);
|
||||
float3 v1 = LoadVertexBVH(bvh, i++);
|
||||
float3 v2 = LoadVertexBVH(bvh, i++);
|
||||
|
||||
// Check triangle
|
||||
float distance = sqrt(DistancePointToTriangle2(pos, v0, v1, v2));
|
||||
if (distance < hit.Distance)
|
||||
{
|
||||
hit.Distance = distance;
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Push children onto the stack to be tested
|
||||
stack[stackCount++] = node.Index + 0;
|
||||
stack[stackCount++] = node.Index + 1;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,18 +1,14 @@
|
||||
// Copyright (c) Wojciech Figat. All rights reserved.
|
||||
|
||||
// Mesh SDF generation based on https://github.com/GPUOpen-Effects/TressFX
|
||||
|
||||
#include "./Flax/Common.hlsl"
|
||||
#include "./Flax/ThirdParty/TressFX/TressFXSDF.hlsl"
|
||||
|
||||
#define THREAD_GROUP_SIZE 64
|
||||
#include "./Flax/MeshAccelerationStructure.hlsl"
|
||||
|
||||
META_CB_BEGIN(0, Data)
|
||||
int3 Resolution;
|
||||
uint ResolutionSize;
|
||||
float MaxDistance;
|
||||
uint VertexStride;
|
||||
bool Index16bit;
|
||||
float BackfacesThreshold;
|
||||
uint TriangleCount;
|
||||
float3 VoxelToPosMul;
|
||||
float WorldUnitsPerVoxel;
|
||||
@@ -20,21 +16,9 @@ float3 VoxelToPosAdd;
|
||||
uint ThreadGroupsX;
|
||||
META_CB_END
|
||||
|
||||
RWBuffer<uint> SDF : register(u0);
|
||||
|
||||
uint GetVoxelIndex(uint3 groupId, uint groupIndex)
|
||||
uint GetVoxelIndex(uint3 groupId, uint groupIndex, uint groupSize)
|
||||
{
|
||||
return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * THREAD_GROUP_SIZE;
|
||||
}
|
||||
|
||||
int3 ClampVoxelCoord(int3 coord)
|
||||
{
|
||||
return clamp(coord, 0, Resolution - 1);
|
||||
}
|
||||
|
||||
int GetVoxelIndex(int3 coord)
|
||||
{
|
||||
return Resolution.x * Resolution.y * coord.z + Resolution.x * coord.y + coord.x;
|
||||
return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * groupSize;
|
||||
}
|
||||
|
||||
float3 GetVoxelPos(int3 coord)
|
||||
@@ -42,12 +26,6 @@ float3 GetVoxelPos(int3 coord)
|
||||
return float3((float)coord.x, (float)coord.y, (float)coord.z) * VoxelToPosMul + VoxelToPosAdd;
|
||||
}
|
||||
|
||||
int3 GetVoxelCoord(float3 pos)
|
||||
{
|
||||
pos = (pos - VoxelToPosAdd) / VoxelToPosMul;
|
||||
return int3((int)pos.x, (int)pos.y, (int)pos.z);
|
||||
}
|
||||
|
||||
int3 GetVoxelCoord(uint index)
|
||||
{
|
||||
uint sizeX = (uint)Resolution.x;
|
||||
@@ -59,191 +37,90 @@ int3 GetVoxelCoord(uint index)
|
||||
return int3((int)coordX, (int)coordY, (int)coordZ);
|
||||
}
|
||||
|
||||
// Clears SDF texture with the initial distance.
|
||||
#ifdef _CS_Init
|
||||
|
||||
#define THREAD_GROUP_SIZE 64
|
||||
|
||||
RWTexture3D<unorm half> SDFtex : register(u0);
|
||||
|
||||
// Clears SDF texture with the maximum distance.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
|
||||
void CS_Init(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
|
||||
{
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex, THREAD_GROUP_SIZE);
|
||||
if (voxelIndex >= ResolutionSize)
|
||||
return;
|
||||
float distance = MaxDistance * 10.0f; // Start with a very large value
|
||||
SDF[voxelIndex] = FloatFlip3(distance);
|
||||
int3 voxelCoord = GetVoxelCoord(voxelIndex);
|
||||
SDFtex[voxelCoord] = 1.0f;
|
||||
}
|
||||
|
||||
// Unpacks SDF texture into distances stores as normal float value (FloatFlip3 is used for interlocked operations on uint).
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
|
||||
void CS_Resolve(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
|
||||
{
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
|
||||
if (voxelIndex >= ResolutionSize)
|
||||
return;
|
||||
SDF[voxelIndex] = IFloatFlip3(SDF[voxelIndex]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _CS_RasterizeTriangle
|
||||
#ifdef _CS_RasterizeTriangles
|
||||
|
||||
#define THREAD_GROUP_SIZE 64
|
||||
|
||||
RWTexture3D<unorm half> SDFtex : register(u0);
|
||||
ByteAddressBuffer VertexBuffer : register(t0);
|
||||
ByteAddressBuffer IndexBuffer : register(t1);
|
||||
|
||||
uint LoadIndex(uint i)
|
||||
{
|
||||
if (Index16bit)
|
||||
{
|
||||
uint index = IndexBuffer.Load((i >> 1u) << 2u);
|
||||
index = (i & 1u) == 1u ? (index >> 16) : index;
|
||||
return index & 0xffff;
|
||||
}
|
||||
return IndexBuffer.Load(i << 2u);
|
||||
}
|
||||
|
||||
float3 LoadVertex(uint i)
|
||||
{
|
||||
return asfloat(VertexBuffer.Load3(i * VertexStride));
|
||||
}
|
||||
StructuredBuffer<BVHNode> BVHBuffer : register(t2);
|
||||
|
||||
// Renders triangle mesh into the SDF texture by writing minimum distance to the triangle into all intersecting voxels.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
|
||||
void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID)
|
||||
void CS_RasterizeTriangles(uint3 GroupId : SV_GroupID, uint3 GroupThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupIndex)
|
||||
{
|
||||
uint triangleIndex = DispatchThreadId.x;
|
||||
if (triangleIndex >= TriangleCount)
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex, THREAD_GROUP_SIZE);
|
||||
if (voxelIndex >= ResolutionSize)
|
||||
return;
|
||||
int3 voxelCoord = GetVoxelCoord(voxelIndex);
|
||||
float3 voxelPos = GetVoxelPos(voxelCoord);
|
||||
|
||||
// Load triangle
|
||||
triangleIndex *= 3;
|
||||
uint i0 = LoadIndex(triangleIndex + 0);
|
||||
uint i1 = LoadIndex(triangleIndex + 1);
|
||||
uint i2 = LoadIndex(triangleIndex + 2);
|
||||
float3 v0 = LoadVertex(i0);
|
||||
float3 v1 = LoadVertex(i1);
|
||||
float3 v2 = LoadVertex(i2);
|
||||
BVHBuffers bvh;
|
||||
bvh.BVHBuffer = BVHBuffer;
|
||||
bvh.VertexBuffer = VertexBuffer;
|
||||
bvh.IndexBuffer = IndexBuffer;
|
||||
bvh.VertexStride = VertexStride;
|
||||
|
||||
// Project triangle into SDF voxels
|
||||
float3 vMargin = float3(WorldUnitsPerVoxel, WorldUnitsPerVoxel, WorldUnitsPerVoxel);
|
||||
float3 vMin = min(min(v0, v1), v2) - vMargin;
|
||||
float3 vMax = max(max(v0, v1), v2) + vMargin;
|
||||
int3 voxelMargin = int3(1, 1, 1);
|
||||
int3 voxelMin = GetVoxelCoord(vMin) - voxelMargin;
|
||||
int3 voxelMax = GetVoxelCoord(vMax) + voxelMargin;
|
||||
voxelMin = ClampVoxelCoord(voxelMin);
|
||||
voxelMax = ClampVoxelCoord(voxelMax);
|
||||
// Point query to find the distance to the closest surface
|
||||
BVHHit hit;
|
||||
PointQueryBVH(bvh, voxelPos, hit, MaxDistance);
|
||||
float sdf = hit.Distance;
|
||||
|
||||
// Rasterize into SDF voxels
|
||||
for (int z = voxelMin.z; z <= voxelMax.z; z++)
|
||||
// Raycast triangles around voxel to count triangle backfaces hit
|
||||
#define CLOSEST_CACHE_SIZE 6
|
||||
float3 closestDirections[CLOSEST_CACHE_SIZE] =
|
||||
{
|
||||
for (int y = voxelMin.y; y <= voxelMax.y; y++)
|
||||
float3(+1, 0, 0),
|
||||
float3(-1, 0, 0),
|
||||
float3(0, +1, 0),
|
||||
float3(0, -1, 0),
|
||||
float3(0, 0, +1),
|
||||
float3(0, 0, -1),
|
||||
};
|
||||
uint hitBackCount = 0;
|
||||
uint minBackfaceHitCount = (uint)(CLOSEST_CACHE_SIZE * BackfacesThreshold);
|
||||
for (uint i = 0; i < CLOSEST_CACHE_SIZE; i++)
|
||||
{
|
||||
float3 rayDir = closestDirections[i];
|
||||
if (RayCastBVH(bvh, voxelPos, rayDir, hit, MaxDistance))
|
||||
{
|
||||
for (int x = voxelMin.x; x <= voxelMax.x; x++)
|
||||
{
|
||||
int3 voxelCoord = int3(x, y, z);
|
||||
int voxelIndex = GetVoxelIndex(voxelCoord);
|
||||
float3 voxelPos = GetVoxelPos(voxelCoord);
|
||||
float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2);
|
||||
#if 0
|
||||
if (distance < -10.0f) // TODO: find a better way to reject negative distance from degenerate triangles that break SDF shape
|
||||
distance = abs(distance);
|
||||
#endif
|
||||
InterlockedMin(SDF[voxelIndex], FloatFlip3(distance));
|
||||
}
|
||||
sdf = min(sdf, hit.Distance);
|
||||
if (hit.IsBackface)
|
||||
hitBackCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_CS_FloodFill) || defined(_CS_Encode)
|
||||
|
||||
Buffer<uint> InSDF : register(t0);
|
||||
|
||||
float GetVoxel(int voxelIndex)
|
||||
{
|
||||
return asfloat(InSDF[voxelIndex]);
|
||||
}
|
||||
|
||||
float GetVoxel(int3 coord)
|
||||
{
|
||||
coord = ClampVoxelCoord(coord);
|
||||
int voxelIndex = GetVoxelIndex(coord);
|
||||
return GetVoxel(voxelIndex);
|
||||
}
|
||||
|
||||
float CombineSDF(float sdf, int3 nearbyCoord, float nearbyDistance)
|
||||
{
|
||||
// Sample nearby voxel
|
||||
float sdfNearby = GetVoxel(nearbyCoord);
|
||||
|
||||
// Include distance to that nearby voxel
|
||||
if (sdfNearby < 0.0f)
|
||||
nearbyDistance *= -1;
|
||||
sdfNearby += nearbyDistance;
|
||||
|
||||
if (sdfNearby > MaxDistance)
|
||||
if (hitBackCount >= minBackfaceHitCount)
|
||||
{
|
||||
// Ignore if nearby sample is invalid (see CS_Init)
|
||||
// Voxel is inside the geometry so turn it into negative distance to the surface
|
||||
sdf *= -1;
|
||||
}
|
||||
else if (sdf > MaxDistance)
|
||||
{
|
||||
// Use nearby sample if current one is invalid (see CS_Init)
|
||||
sdf = sdfNearby;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use distance closer to 0
|
||||
sdf = sdf >= 0 ? min(sdf, sdfNearby) : max(sdf, sdfNearby);
|
||||
}
|
||||
|
||||
return sdf;
|
||||
}
|
||||
|
||||
// Fills the voxels with minimum distances to the triangles.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
|
||||
void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
|
||||
{
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
|
||||
if (voxelIndex >= ResolutionSize)
|
||||
return;
|
||||
float sdf = GetVoxel(voxelIndex);
|
||||
|
||||
// Skip if the distance is already so small that we know that triangle is nearby
|
||||
if (abs(sdf) > WorldUnitsPerVoxel * 1.2f)
|
||||
{
|
||||
int3 voxelCoord = GetVoxelCoord(voxelIndex);
|
||||
int3 offset = int3(-1, 0, 1);
|
||||
|
||||
// Sample nearby voxels
|
||||
float nearbyDistance = WorldUnitsPerVoxel;
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.zyy, nearbyDistance);
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.yzy, nearbyDistance);
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.yyz, nearbyDistance);
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance);
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance);
|
||||
sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance);
|
||||
}
|
||||
|
||||
SDF[voxelIndex] = asuint(sdf);
|
||||
}
|
||||
|
||||
RWTexture3D<half> SDFtex : register(u1);
|
||||
|
||||
// Encodes SDF values into the packed format with normalized distances.
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
|
||||
void CS_Encode(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
|
||||
{
|
||||
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
|
||||
if (voxelIndex >= ResolutionSize)
|
||||
return;
|
||||
float sdf = GetVoxel(voxelIndex);
|
||||
sdf = min(sdf, MaxDistance);
|
||||
|
||||
// Pack from range [-MaxDistance; +MaxDistance] to [0; 1]
|
||||
sdf = clamp(sdf, -MaxDistance, MaxDistance);
|
||||
sdf = (sdf / MaxDistance) * 0.5f + 0.5f;
|
||||
|
||||
int3 voxelCoord = GetVoxelCoord(voxelIndex);
|
||||
SDFtex[voxelCoord] = sdf;
|
||||
}
|
||||
|
||||
|
||||
129
Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl
vendored
129
Source/Shaders/ThirdParty/TressFX/TressFXSDF.hlsl
vendored
@@ -1,129 +0,0 @@
|
||||
// Source: https://github.com/GPUOpen-Effects/TressFX
|
||||
// License: MIT
|
||||
|
||||
//
|
||||
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
//When building the SDF we want to find the lowest distance at each SDF cell. In order to allow multiple threads to write to the same
|
||||
//cells, it is necessary to use atomics. However, there is no support for atomics with 32-bit floats so we convert the float into unsigned int
|
||||
//and use atomic_min() / InterlockedMin() as a workaround.
|
||||
//
|
||||
//When used with atomic_min, both FloatFlip2() and FloatFlip3() will store the float with the lowest magnitude.
|
||||
//The difference is that FloatFlip2() will preper negative values ( InterlockedMin( FloatFlip2(1.0), FloatFlip2(-1.0) ) == -1.0 ),
|
||||
//while FloatFlip3() prefers positive values ( InterlockedMin( FloatFlip3(1.0), FloatFlip3(-1.0) ) == 1.0 ).
|
||||
//Using FloatFlip3() seems to result in a SDF with higher quality compared to FloatFlip2().
|
||||
uint FloatFlip2(float fl)
|
||||
{
|
||||
uint f = asuint(fl);
|
||||
return (f << 1) | (f >> 31 ^ 0x00000001); //Rotate sign bit to least significant and Flip sign bit so that (0 == negative)
|
||||
}
|
||||
uint IFloatFlip2(uint f2)
|
||||
{
|
||||
return (f2 >> 1) | (f2 << 31 ^ 0x80000000);
|
||||
}
|
||||
uint FloatFlip3(float fl)
|
||||
{
|
||||
uint f = asuint(fl);
|
||||
return (f << 1) | (f >> 31); //Rotate sign bit to least significant
|
||||
}
|
||||
uint IFloatFlip3(uint f2)
|
||||
{
|
||||
return (f2 >> 1) | (f2 << 31);
|
||||
}
|
||||
|
||||
float DistancePointToEdge(float3 p, float3 x0, float3 x1, out float3 n)
|
||||
{
|
||||
// Hack to swap to ensure the order is correct (.x only for simplicity)
|
||||
if (x0.x > x1.x)
|
||||
{
|
||||
float3 temp = x0;
|
||||
x0 = x1;
|
||||
x1 = temp;
|
||||
}
|
||||
|
||||
float3 x10 = x1 - x0;
|
||||
|
||||
float t = dot(x1 - p, x10) / dot(x10, x10);
|
||||
t = max(0.0f, min(t, 1.0f));
|
||||
|
||||
float3 a = p - (t*x0 + (1.0f - t)*x1);
|
||||
float d = length(a);
|
||||
n = a / (d + 1e-30f);
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
// Check if p is in the positive or negative side of triangle (x0, x1, x2)
|
||||
// Positive side is where the normal vector of triangle ( (x1-x0) x (x2-x0) ) is pointing to.
|
||||
float SignedDistancePointToTriangle(float3 p, float3 x0, float3 x1, float3 x2)
|
||||
{
|
||||
float d = 0;
|
||||
float3 x02 = x0 - x2;
|
||||
float l0 = length(x02) + 1e-30f;
|
||||
x02 = x02 / l0;
|
||||
float3 x12 = x1 - x2;
|
||||
float l1 = dot(x12, x02);
|
||||
x12 = x12 - l1*x02;
|
||||
float l2 = length(x12) + 1e-30f;
|
||||
x12 = x12 / l2;
|
||||
float3 px2 = p - x2;
|
||||
|
||||
float b = dot(x12, px2) / l2;
|
||||
float a = (dot(x02, px2) - l1*b) / l0;
|
||||
float c = 1 - a - b;
|
||||
|
||||
// normal vector of triangle. Don't need to normalize this yet.
|
||||
float3 nTri = cross((x1 - x0), (x2 - x0));
|
||||
float3 n;
|
||||
|
||||
float tol = 1e-8f;
|
||||
|
||||
if (a >= -tol && b >= -tol && c >= -tol)
|
||||
{
|
||||
n = p - (a*x0 + b*x1 + c*x2);
|
||||
d = length(n);
|
||||
|
||||
float3 n1 = n / d;
|
||||
float3 n2 = nTri / (length(nTri) + 1e-30f); // if d == 0
|
||||
|
||||
n = (d > 0) ? n1 : n2;
|
||||
}
|
||||
else
|
||||
{
|
||||
float3 n_12;
|
||||
float3 n_02;
|
||||
d = DistancePointToEdge(p, x0, x1, n);
|
||||
|
||||
float d12 = DistancePointToEdge(p, x1, x2, n_12);
|
||||
float d02 = DistancePointToEdge(p, x0, x2, n_02);
|
||||
|
||||
d = min(d, d12);
|
||||
d = min(d, d02);
|
||||
|
||||
n = (d == d12) ? n_12 : n;
|
||||
n = (d == d02) ? n_02 : n;
|
||||
}
|
||||
|
||||
d = (dot(p - x0, nTri) < 0.f) ? -d : d;
|
||||
|
||||
return d;
|
||||
}
|
||||
Reference in New Issue
Block a user