Add **Model SDF baking on GPU** via Compute Shader

This commit is contained in:
Wojtek Figat
2024-05-29 14:53:13 +02:00
parent 5f4c57d3eb
commit 53d77d3421
11 changed files with 793 additions and 92 deletions

BIN
Content/Shaders/GI/GlobalSurfaceAtlas.flax (Stored with Git LFS)

Binary file not shown.

BIN
Content/Shaders/SDF.flax (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -344,6 +344,7 @@ bool DeployDataStep::Perform(CookingData& data)
data.AddRootEngineAsset(TEXT("Shaders/Sky"));
data.AddRootEngineAsset(TEXT("Shaders/SSAO"));
data.AddRootEngineAsset(TEXT("Shaders/SSR"));
data.AddRootEngineAsset(TEXT("Shaders/SDF"));
data.AddRootEngineAsset(TEXT("Shaders/VolumetricFog"));
data.AddRootEngineAsset(TEXT("Engine/DefaultMaterial"));
data.AddRootEngineAsset(TEXT("Engine/DefaultDeformableMaterial"));

View File

@@ -298,11 +298,12 @@ namespace FlaxEditor.Windows.Assets
proxy.Window.Enabled = false;
Task.Run(() =>
{
proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold);
bool failed = proxy.Asset.GenerateSDF(proxy.Window._importSettings.Settings.SDFResolution, _sdfModelLodIndex.Value, true, proxy.Window._backfacesThreshold);
FlaxEngine.Scripting.InvokeOnUpdate(() =>
{
proxy.Window.Enabled = true;
proxy.Window.MarkAsEdited();
if (!failed)
proxy.Window.MarkAsEdited();
Presenter.BuildLayoutOnUpdate();
});
});

View File

@@ -93,7 +93,7 @@ public:
/// <param name="asset">The asset to set.</param>
AssetReference(T* asset)
{
OnSet(asset);
OnSet((Asset*)asset);
}
/// <summary>
@@ -215,7 +215,7 @@ public:
/// <param name="asset">The asset.</param>
void Set(T* asset)
{
OnSet(asset);
OnSet((Asset*)asset);
}
};

View File

@@ -650,7 +650,7 @@ bool Model::Save(bool withMeshDataFromGpu, const StringView& path)
#endif
bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold)
bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, float backfacesThreshold, bool useGPU)
{
if (EnableModelSDF == 2)
return true; // Not supported
@@ -673,7 +673,10 @@ bool Model::GenerateSDF(float resolutionScale, int32 lodIndex, bool cacheData, f
#else
class MemoryWriteStream* outputStream = nullptr;
#endif
if (ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold))
Locker.Unlock();
const bool failed = ModelTool::GenerateModelSDF(this, nullptr, resolutionScale, lodIndex, &SDF, outputStream, GetPath(), backfacesThreshold, useGPU);
Locker.Lock();
if (failed)
return true;
#if USE_EDITOR

View File

@@ -227,8 +227,9 @@ public:
/// <param name="lodIndex">The index of the LOD to use for the SDF building.</param>
/// <param name="cacheData">If true, the generated SDF texture data will be cached on CPU (in asset chunk storage) to allow saving it later, otherwise it will be runtime for GPU-only. Ignored for virtual assets or in build.</param>
/// <param name="backfacesThreshold">Custom threshold (in range 0-1) for adjusting mesh internals detection based on the percentage of test rays hit triangle backfaces. Use lower value for more dense mesh.</param>
/// <param name="useGPU">Enables using GPU for SDF generation, otherwise CPU will be used (async via Job System).</param>
/// <returns>True if failed, otherwise false.</returns>
API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f);
API_FUNCTION() bool GenerateSDF(float resolutionScale = 1.0f, int32 lodIndex = 6, bool cacheData = true, float backfacesThreshold = 0.6f, bool useGPU = true);
/// <summary>
/// Sets set SDF data (releases the current one).

View File

@@ -8,15 +8,20 @@
#include "Engine/Core/RandomStream.h"
#include "Engine/Core/Math/Vector3.h"
#include "Engine/Core/Math/Ray.h"
#include "Engine/Profiler/ProfilerCPU.h"
#include "Engine/Platform/ConditionVariable.h"
#include "Engine/Profiler/Profiler.h"
#include "Engine/Threading/JobSystem.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/GPUBuffer.h"
#include "Engine/Graphics/RenderTools.h"
#include "Engine/Graphics/Async/GPUTask.h"
#include "Engine/Graphics/Shaders/GPUShader.h"
#include "Engine/Graphics/Textures/GPUTexture.h"
#include "Engine/Graphics/Textures/TextureData.h"
#include "Engine/Graphics/Models/ModelData.h"
#include "Engine/Content/Assets/Model.h"
#include "Engine/Content/Content.h"
#include "Engine/Content/Assets/Shader.h"
#include "Engine/Serialization/MemoryWriteStream.h"
#include "Engine/Engine/Units.h"
#if USE_EDITOR
@@ -71,7 +76,261 @@ ModelSDFMip::ModelSDFMip(int32 mipIndex, const TextureMipData& mip)
{
}
bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold)
class GPUModelSDFTask : public GPUTask
{
ConditionVariable* _signal;
AssetReference<Shader> _shader;
Model* _inputModel;
ModelData* _modelData;
int32 _lodIndex;
Int3 _resolution;
ModelBase::SDFData* _sdf;
GPUBuffer *_sdfSrc, *_sdfDst;
GPUTexture* _sdfResult;
Float3 _xyzToLocalMul, _xyzToLocalAdd;
const uint32 ThreadGroupSize = 64;
PACK_STRUCT(struct alignas(GPU_SHADER_DATA_ALIGNMENT) Data
{
Int3 Resolution;
uint32 ResolutionSize;
float MaxDistance;
uint32 VertexStride;
int32 Index16bit;
uint32 TriangleCount;
Float3 VoxelToPosMul;
float WorldUnitsPerVoxel;
Float3 VoxelToPosAdd;
uint32 ThreadGroupsX;
});
public:
GPUModelSDFTask(ConditionVariable& signal, Model* inputModel, ModelData* modelData, int32 lodIndex, const Int3& resolution, ModelBase::SDFData* sdf, GPUTexture* sdfResult, const Float3& xyzToLocalMul, const Float3& xyzToLocalAdd)
: GPUTask(Type::Custom)
, _signal(&signal)
, _shader(Content::LoadAsyncInternal<Shader>(TEXT("Shaders/SDF")))
, _inputModel(inputModel)
, _modelData(modelData)
, _lodIndex(lodIndex)
, _resolution(resolution)
, _sdf(sdf)
, _sdfSrc(GPUBuffer::New())
, _sdfDst(GPUBuffer::New())
, _sdfResult(sdfResult)
, _xyzToLocalMul(xyzToLocalMul)
, _xyzToLocalAdd(xyzToLocalAdd)
{
}
~GPUModelSDFTask()
{
SAFE_DELETE_GPU_RESOURCE(_sdfSrc);
SAFE_DELETE_GPU_RESOURCE(_sdfDst);
}
Result run(GPUTasksContext* tasksContext) override
{
PROFILE_GPU_CPU("GPUModelSDFTask");
GPUContext* context = tasksContext->GPU;
// Allocate resources
if (_shader == nullptr || _shader->WaitForLoaded())
return Result::Failed;
GPUShader* shader = _shader->GetShader();
const uint32 resolutionSize = _resolution.X * _resolution.Y * _resolution.Z;
auto desc = GPUBufferDescription::Typed(resolutionSize, PixelFormat::R32_UInt, true);
// TODO: use transient texture (single frame)
if (_sdfSrc->Init(desc) || _sdfDst->Init(desc))
return Result::Failed;
auto cb = shader->GetCB(0);
Data data;
data.Resolution = _resolution;
data.ResolutionSize = resolutionSize;
data.MaxDistance = _sdf->MaxDistance;
data.WorldUnitsPerVoxel = _sdf->WorldUnitsPerVoxel;
data.VoxelToPosMul = _xyzToLocalMul;
data.VoxelToPosAdd = _xyzToLocalAdd;
// Dispatch in 1D and fallback to 2D when using large resolution
Int3 threadGroups(Math::CeilToInt((float)resolutionSize / ThreadGroupSize), 1, 1);
if (threadGroups.X > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
{
const uint32 groups = threadGroups.X;
threadGroups.X = Math::CeilToInt(Math::Sqrt((float)groups));
threadGroups.Y = Math::CeilToInt((float)groups / threadGroups.X);
}
data.ThreadGroupsX = threadGroups.X;
// Init SDF volume
context->BindCB(0, cb);
context->UpdateCB(cb, &data);
context->BindUA(0, _sdfSrc->View());
context->Dispatch(shader->GetCS("CS_Init"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Rendering input triangles into the SDF volume
if (_inputModel)
{
PROFILE_GPU_CPU_NAMED("Rasterize");
const ModelLOD& lod = _inputModel->LODs[Math::Clamp(_lodIndex, _inputModel->HighestResidentLODIndex(), _inputModel->LODs.Count() - 1)];
GPUBuffer *vbTemp = nullptr, *ibTemp = nullptr;
for (int32 i = 0; i < lod.Meshes.Count(); i++)
{
const Mesh& mesh = lod.Meshes[i];
const MaterialSlot& materialSlot = _inputModel->MaterialSlots[mesh.GetMaterialSlotIndex()];
if (materialSlot.Material && !materialSlot.Material->WaitForLoaded())
{
// Skip transparent materials
if (materialSlot.Material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
continue;
}
GPUBuffer* vb = mesh.GetVertexBuffer(0);
GPUBuffer* ib = mesh.GetIndexBuffer();
data.Index16bit = mesh.Use16BitIndexBuffer() ? 1 : 0;
data.VertexStride = vb->GetStride();
data.TriangleCount = mesh.GetTriangleCount();
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
{
// TODO: support larger meshes via 2D dispatch
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
continue;
}
context->UpdateCB(cb, &data);
if (!EnumHasAllFlags(vb->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
{
desc = GPUBufferDescription::Raw(vb->GetSize(), GPUBufferFlags::ShaderResource);
// TODO: use transient buffer (single frame)
if (!vbTemp)
vbTemp = GPUBuffer::New();
vbTemp->Init(desc);
context->CopyBuffer(vbTemp, vb, desc.Size);
vb = vbTemp;
}
if (!EnumHasAllFlags(ib->GetDescription().Flags, GPUBufferFlags::RawBuffer | GPUBufferFlags::ShaderResource))
{
desc = GPUBufferDescription::Raw(ib->GetSize(), GPUBufferFlags::ShaderResource);
// TODO: use transient buffer (single frame)
if (!ibTemp)
ibTemp = GPUBuffer::New();
ibTemp->Init(desc);
context->CopyBuffer(ibTemp, ib, desc.Size);
ib = ibTemp;
}
context->BindSR(0, vb->View());
context->BindSR(1, ib->View());
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
}
SAFE_DELETE_GPU_RESOURCE(vbTemp);
SAFE_DELETE_GPU_RESOURCE(ibTemp);
}
else if (_modelData)
{
PROFILE_GPU_CPU_NAMED("Rasterize");
const ModelLodData& lod = _modelData->LODs[Math::Clamp(_lodIndex, 0, _modelData->LODs.Count() - 1)];
auto vb = GPUBuffer::New();
auto ib = GPUBuffer::New();
for (int32 i = 0; i < lod.Meshes.Count(); i++)
{
const MeshData* mesh = lod.Meshes[i];
const MaterialSlotEntry& materialSlot = _modelData->Materials[mesh->MaterialSlotIndex];
auto material = Content::LoadAsync<MaterialBase>(materialSlot.AssetID);
if (material && !material->WaitForLoaded())
{
// Skip transparent materials
if (material->GetInfo().BlendMode != MaterialBlendMode::Opaque)
continue;
}
data.Index16bit = 0;
data.VertexStride = sizeof(Float3);
data.TriangleCount = mesh->Indices.Count() / 3;
const uint32 groups = Math::CeilToInt((float)data.TriangleCount / ThreadGroupSize);
if (groups > GPU_MAX_CS_DISPATCH_THREAD_GROUPS)
{
// TODO: support larger meshes via 2D dispatch
LOG(Error, "Not supported mesh with {} triangles.", data.TriangleCount);
continue;
}
context->UpdateCB(cb, &data);
desc = GPUBufferDescription::Raw(mesh->Positions.Count() * sizeof(Float3), GPUBufferFlags::ShaderResource);
desc.InitData = mesh->Positions.Get();
// TODO: use transient buffer (single frame)
vb->Init(desc);
desc = GPUBufferDescription::Raw(mesh->Indices.Count() * sizeof(uint32), GPUBufferFlags::ShaderResource);
desc.InitData = mesh->Indices.Get();
// TODO: use transient buffer (single frame)
ib->Init(desc);
context->BindSR(0, vb->View());
context->BindSR(1, ib->View());
context->Dispatch(shader->GetCS("CS_RasterizeTriangle"), groups, 1, 1);
}
SAFE_DELETE_GPU_RESOURCE(vb);
SAFE_DELETE_GPU_RESOURCE(ib);
}
// Convert SDF volume data back to floats
context->Dispatch(shader->GetCS("CS_Resolve"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Run linear flood-fill loop to populate all voxels with valid distances (spreads the initial values from triangles rasterization)
{
PROFILE_GPU_CPU_NAMED("FloodFill");
auto csFloodFill = shader->GetCS("CS_FloodFill");
const int32 floodFillIterations = Math::Max(_resolution.MaxValue() / 2 + 1, 8);
for (int32 floodFill = 0; floodFill < floodFillIterations; floodFill++)
{
context->ResetUA();
context->BindUA(0, _sdfDst->View());
context->BindSR(0, _sdfSrc->View());
context->Dispatch(csFloodFill, threadGroups.X, threadGroups.Y, threadGroups.Z);
Swap(_sdfSrc, _sdfDst);
}
}
// Encode SDF values into output storage
context->ResetUA();
context->BindSR(0, _sdfSrc->View());
// TODO: update GPU SDF texture within this task to skip additional CPU->GPU copy
auto sdfTextureDesc = GPUTextureDescription::New3D(_resolution.X, _resolution.Y, _resolution.Z, PixelFormat::R16_UNorm, GPUTextureFlags::UnorderedAccess | GPUTextureFlags::RenderTarget);
// TODO: use transient texture (single frame)
auto sdfTexture = GPUTexture::New();
sdfTexture->Init(sdfTextureDesc);
context->BindUA(1, sdfTexture->ViewVolume());
context->Dispatch(shader->GetCS("CS_Encode"), threadGroups.X, threadGroups.Y, threadGroups.Z);
// Copy result data into readback buffer
if (_sdfResult)
{
sdfTextureDesc = sdfTextureDesc.ToStagingReadback();
_sdfResult->Init(sdfTextureDesc);
context->CopyTexture(_sdfResult, 0, 0, 0, 0, sdfTexture, 0);
}
SAFE_DELETE_GPU_RESOURCE(sdfTexture);
return Result::Ok;
}
void OnSync() override
{
GPUTask::OnSync();
_signal->NotifyOne();
}
void OnFail() override
{
GPUTask::OnFail();
_signal->NotifyOne();
}
void OnCancel() override
{
GPUTask::OnCancel();
_signal->NotifyOne();
}
};
bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold, bool useGPU)
{
PROFILE_CPU();
auto startTime = Platform::GetTimeSeconds();
@@ -127,7 +386,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
*(uint8*)ptr = (uint8)v;
};
}
GPUTextureDescription textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount);
auto textureDesc = GPUTextureDescription::New3D(resolution.X, resolution.Y, resolution.Z, format, GPUTextureFlags::ShaderResource, mipCount);
if (outputSDF)
{
*outputSDF = sdf;
@@ -143,19 +402,10 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
#endif
}
// TODO: support GPU to generate model SDF on-the-fly (if called during rendering)
// Setup acceleration structure for fast ray tracing the mesh triangles
MeshAccelerationStructure scene;
if (inputModel)
scene.Add(inputModel, lodIndex);
else if (modelData)
scene.Add(modelData, lodIndex);
scene.BuildBVH();
// Allocate memory for the distant field
const int32 voxelsSize = resolution.X * resolution.Y * resolution.Z * formatStride;
void* voxels = Allocator::Allocate(voxelsSize);
BytesContainer voxels;
voxels.Allocate(voxelsSize);
Float3 xyzToLocalMul = uvwToLocalMul / Float3(resolution - 1);
Float3 xyzToLocalAdd = uvwToLocalAdd;
const Float2 encodeMAD(0.5f / sdf.MaxDistance * formatMaxValue, 0.5f * formatMaxValue);
@@ -163,74 +413,125 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
int32 voxelSizeSum = voxelsSize;
// TODO: use optimized sparse storage for SDF data as hierarchical bricks as in papers below:
// https://gpuopen.com/gdc-presentations/2023/GDC-2023-Sparse-Distance-Fields-For-Games.pdf + https://www.youtube.com/watch?v=iY15xhuuHPQ&ab_channel=AMD
// https://graphics.pixar.com/library/IrradianceAtlas/paper.pdf
// http://maverick.inria.fr/Membres/Cyril.Crassin/thesis/CCrassinThesis_EN_Web.pdf
// http://ramakarl.com/pdfs/2016_Hoetzlein_GVDB.pdf
// https://www.cse.chalmers.se/~uffe/HighResolutionSparseVoxelDAGs.pdf
// Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
constexpr int32 sampleCount = 12;
Float3 sampleDirections[sampleCount];
// Check if run SDF generation on a GPU via Compute Shader or on a Job System
useGPU &= GPUDevice::Instance
&& GPUDevice::Instance->GetState() == GPUDevice::DeviceState::Ready
&& GPUDevice::Instance->Limits.HasCompute
&& format == PixelFormat::R16_UNorm
&& !IsInMainThread() // TODO: support GPU to generate model SDF on-the-fly directly into virtual model (if called during rendering)
&& resolution.MaxValue() > 8;
if (useGPU)
{
RandomStream rand;
sampleDirections[0] = Float3::Up;
sampleDirections[1] = Float3::Down;
sampleDirections[2] = Float3::Left;
sampleDirections[3] = Float3::Right;
sampleDirections[4] = Float3::Forward;
sampleDirections[5] = Float3::Backward;
for (int32 i = 6; i < sampleCount; i++)
sampleDirections[i] = rand.GetUnitVector();
}
Function<void(int32)> sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z)
{
PROFILE_CPU_NAMED("Model SDF Job");
Real hitDistance;
Vector3 hitNormal, hitPoint;
Triangle hitTriangle;
const int32 zAddress = resolution.Y * resolution.X * z;
for (int32 y = 0; y < resolution.Y; y++)
PROFILE_CPU_NAMED("GPU");
// TODO: skip using sdfResult and downloading SDF from GPU when updating virtual model
auto sdfResult = GPUTexture::New();
// Run SDF generation via GPU async task
ConditionVariable signal;
CriticalSection mutex;
Task* task = New<GPUModelSDFTask>(signal, inputModel, modelData, lodIndex, resolution, &sdf, sdfResult, xyzToLocalMul, xyzToLocalAdd);
task->Start();
mutex.Lock();
signal.Wait(mutex);
mutex.Unlock();
bool failed = task->IsFailed();
// Gather result data from GPU to CPU
if (!failed && sdfResult)
{
const int32 yAddress = resolution.X * y + zAddress;
for (int32 x = 0; x < resolution.X; x++)
{
Real minDistance = sdf.MaxDistance;
Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
// Point query to find the distance to the closest surface
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
// Raycast samples around voxel to count triangle backfaces hit
int32 hitBackCount = 0, hitCount = 0;
for (int32 sample = 0; sample < sampleCount; sample++)
{
Ray sampleRay(voxelPos, sampleDirections[sample]);
sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
{
if (hitDistance < minDistance)
minDistance = hitDistance;
hitCount++;
const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
if (backHit)
hitBackCount++;
}
}
float distance = (float)minDistance;
// TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
// if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
{
// Voxel is inside the geometry so turn it into negative distance to the surface
distance *= -1;
}
const int32 xAddress = x + yAddress;
formatWrite((byte*)voxels + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
}
TextureMipData mipData;
const uint32 rowPitch = resolution.X * formatStride;
failed = sdfResult->GetData(0, 0, mipData, rowPitch);
failed |= voxels.Length() != mipData.Data.Length();
if (!failed)
voxels = mipData.Data;
}
};
JobSystem::Execute(sdfJob, resolution.Z);
SAFE_DELETE_GPU_RESOURCE(sdfResult);
if (failed)
return true;
}
else
{
// Setup acceleration structure for fast ray tracing the mesh triangles
MeshAccelerationStructure scene;
if (inputModel)
scene.Add(inputModel, lodIndex);
else if (modelData)
scene.Add(modelData, lodIndex);
scene.BuildBVH();
// Brute-force for each voxel to calculate distance to the closest triangle with point query and distance sign by raycasting around the voxel
constexpr int32 sampleCount = 12;
Float3 sampleDirections[sampleCount];
{
RandomStream rand;
sampleDirections[0] = Float3::Up;
sampleDirections[1] = Float3::Down;
sampleDirections[2] = Float3::Left;
sampleDirections[3] = Float3::Right;
sampleDirections[4] = Float3::Forward;
sampleDirections[5] = Float3::Backward;
for (int32 i = 6; i < sampleCount; i++)
sampleDirections[i] = rand.GetUnitVector();
}
Function<void(int32)> sdfJob = [&sdf, &resolution, &backfacesThreshold, sampleDirections, &sampleCount, &scene, &voxels, &xyzToLocalMul, &xyzToLocalAdd, &encodeMAD, &formatStride, &formatWrite](int32 z)
{
PROFILE_CPU_NAMED("Model SDF Job");
Real hitDistance;
Vector3 hitNormal, hitPoint;
Triangle hitTriangle;
const int32 zAddress = resolution.Y * resolution.X * z;
for (int32 y = 0; y < resolution.Y; y++)
{
const int32 yAddress = resolution.X * y + zAddress;
for (int32 x = 0; x < resolution.X; x++)
{
Real minDistance = sdf.MaxDistance;
Vector3 voxelPos = Float3((float)x, (float)y, (float)z) * xyzToLocalMul + xyzToLocalAdd;
// Point query to find the distance to the closest surface
scene.PointQuery(voxelPos, minDistance, hitPoint, hitTriangle);
// Raycast samples around voxel to count triangle backfaces hit
int32 hitBackCount = 0, hitCount = 0;
for (int32 sample = 0; sample < sampleCount; sample++)
{
Ray sampleRay(voxelPos, sampleDirections[sample]);
sampleRay.Position -= sampleRay.Direction * 0.0001f; // Apply small margin
if (scene.RayCast(sampleRay, hitDistance, hitNormal, hitTriangle))
{
if (hitDistance < minDistance)
minDistance = hitDistance;
hitCount++;
const bool backHit = Float3::Dot(sampleRay.Direction, hitTriangle.GetNormal()) > 0;
if (backHit)
hitBackCount++;
}
}
float distance = (float)minDistance;
// TODO: surface thickness threshold? shift reduce distance for all voxels by something like 0.01 to enlarge thin geometry
// if ((float)hitBackCount > (float)hitCount * 0.3f && hitCount != 0)
if ((float)hitBackCount > (float)sampleCount * backfacesThreshold && hitCount != 0)
{
// Voxel is inside the geometry so turn it into negative distance to the surface
distance *= -1;
}
const int32 xAddress = x + yAddress;
formatWrite(voxels.Get() + xAddress * formatStride, distance * encodeMAD.X + encodeMAD.Y);
}
}
};
JobSystem::Execute(sdfJob, resolution.Z);
}
// Cache SDF data on a CPU
if (outputStream)
@@ -240,20 +541,19 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
outputStream->WriteBytes(&data, sizeof(data));
ModelSDFMip mipData(0, resolution.X * formatStride, voxelsSize);
outputStream->WriteBytes(&mipData, sizeof(mipData));
outputStream->WriteBytes(voxels, voxelsSize);
outputStream->WriteBytes(voxels.Get(), voxelsSize);
}
// Upload data to the GPU
if (outputSDF)
{
BytesContainer data;
data.Link((byte*)voxels, voxelsSize);
auto task = outputSDF->Texture->UploadMipMapAsync(data, 0, resolution.X * formatStride, voxelsSize, true);
auto task = outputSDF->Texture->UploadMipMapAsync(voxels, 0, resolution.X * formatStride, voxelsSize, true);
if (task)
task->Start();
}
// Generate mip maps
void* voxelsMipSrc = voxels.Get();
void* voxelsMip = nullptr;
for (int32 mipLevel = 1; mipLevel < mipCount; mipLevel++)
{
@@ -263,7 +563,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
voxelsMip = Allocator::Allocate(voxelsMipSize);
// Downscale mip
Function<void(int32)> mipJob = [&voxelsMip, &voxels, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z)
Function<void(int32)> mipJob = [&voxelsMip, &voxelsMipSrc, &resolution, &resolutionMip, &encodeMAD, &decodeMAD, &formatStride, &formatRead, &formatWrite](int32 z)
{
PROFILE_CPU_NAMED("Model SDF Mip Job");
const int32 zAddress = resolutionMip.Y * resolutionMip.X * z;
@@ -284,7 +584,7 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
for (int32 dx = 0; dx < 2; dx++)
{
const int32 dxAddress = (x * 2 + dx) + dyAddress;
const float d = formatRead((byte*)voxels + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y;
const float d = formatRead((byte*)voxelsMipSrc + dxAddress * formatStride) * decodeMAD.X + decodeMAD.Y;
distance += d;
}
}
@@ -318,12 +618,11 @@ bool ModelTool::GenerateModelSDF(Model* inputModel, ModelData* modelData, float
// Go down
voxelSizeSum += voxelsSize;
Swap(voxelsMip, voxels);
Swap(voxelsMip, voxelsMipSrc);
resolution = resolutionMip;
}
Allocator::Free(voxelsMip);
Allocator::Free(voxels);
#if !BUILD_RELEASE
auto endTime = Platform::GetTimeSeconds();

View File

@@ -98,7 +98,7 @@ API_CLASS(Namespace="FlaxEngine.Tools", Static) class FLAXENGINE_API ModelTool
// Optional: inputModel or modelData
// Optional: outputSDF or null, outputStream or null
static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f);
static bool GenerateModelSDF(class Model* inputModel, class ModelData* modelData, float resolutionScale, int32 lodIndex, ModelBase::SDFData* outputSDF, class MemoryWriteStream* outputStream, const StringView& assetName, float backfacesThreshold = 0.6f, bool useGPU = true);
#if USE_EDITOR

272
Source/Shaders/SDF.shader Normal file
View File

@@ -0,0 +1,272 @@
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
// Mesh SDF generation based on https://github.com/GPUOpen-Effects/TressFX
#include "./Flax/Common.hlsl"
#include "./Flax/ThirdParty/TressFX/TressFXSDF.hlsl"
#define THREAD_GROUP_SIZE 64
META_CB_BEGIN(0, Data)
int3 Resolution;
uint ResolutionSize;
float MaxDistance;
uint VertexStride;
bool Index16bit;
uint TriangleCount;
float3 VoxelToPosMul;
float WorldUnitsPerVoxel;
float3 VoxelToPosAdd;
uint ThreadGroupsX;
META_CB_END
RWBuffer<uint> SDF : register(u0);
uint GetVoxelIndex(uint3 groupId, uint groupIndex)
{
return groupIndex + (groupId.x + groupId.y * ThreadGroupsX) * THREAD_GROUP_SIZE;
}
int3 ClampVoxelCoord(int3 coord)
{
return clamp(coord, 0, Resolution - 1);
}
int GetVoxelIndex(int3 coord)
{
return Resolution.x * Resolution.y * coord.z + Resolution.x * coord.y + coord.x;
}
float3 GetVoxelPos(int3 coord)
{
return float3((float)coord.x, (float)coord.y, (float)coord.z) * VoxelToPosMul + VoxelToPosAdd;
}
int3 GetVoxelCoord(float3 pos)
{
pos = (pos - VoxelToPosAdd) / VoxelToPosMul;
return int3((int)pos.x, (int)pos.y, (int)pos.z);
}
int3 GetVoxelCoord(uint index)
{
uint sizeX = (uint)Resolution.x;
uint sizeY = (uint)(Resolution.x * Resolution.y);
uint coordZ = index / sizeY;
uint coordXY = index % sizeY;
uint coordY = coordXY / sizeX;
uint coordX = coordXY % sizeX;
return int3((int)coordX, (int)coordY, (int)coordZ);
}
// Clears SDF texture with the initial distance.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Init(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
float distance = MaxDistance * 10.0f; // Start with a very large value
SDF[voxelIndex] = FloatFlip3(distance);
}
// Unpacks SDF texture into distances stores as normal float value (FloatFlip3 is used for interlocked operations on uint).
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Resolve(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
SDF[voxelIndex] = IFloatFlip3(SDF[voxelIndex]);
}
#ifdef _CS_RasterizeTriangle
ByteAddressBuffer VertexBuffer : register(t0);
ByteAddressBuffer IndexBuffer : register(t1);
uint LoadIndex(uint i)
{
if (Index16bit)
{
uint index = IndexBuffer.Load((i >> 1u) << 2u);
index = (i & 1u) == 1u ? (index >> 16) : index;
return index & 0xffff;
}
return IndexBuffer.Load(i << 2u);
}
float3 LoadVertex(uint i)
{
return asfloat(VertexBuffer.Load3(i * VertexStride));
}
// Renders triangle mesh into the SDF texture by writing minimum distance to the triangle into all intersecting voxels.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_RasterizeTriangle(uint3 DispatchThreadId : SV_DispatchThreadID)
{
uint triangleIndex = DispatchThreadId.x;
if (triangleIndex >= TriangleCount)
return;
// Load triangle
triangleIndex *= 3;
uint i0 = LoadIndex(triangleIndex + 0);
uint i1 = LoadIndex(triangleIndex + 1);
uint i2 = LoadIndex(triangleIndex + 2);
float3 v0 = LoadVertex(i0);
float3 v1 = LoadVertex(i1);
float3 v2 = LoadVertex(i2);
// Project triangle into SDF voxels
float3 vMargin = float3(WorldUnitsPerVoxel, WorldUnitsPerVoxel, WorldUnitsPerVoxel);
float3 vMin = min(min(v0, v1), v2) - vMargin;
float3 vMax = max(max(v0, v1), v2) + vMargin;
int3 voxelMargin = int3(1, 1, 1);
int3 voxelMin = GetVoxelCoord(vMin) - voxelMargin;
int3 voxelMax = GetVoxelCoord(vMax) + voxelMargin;
voxelMin = ClampVoxelCoord(voxelMin);
voxelMax = ClampVoxelCoord(voxelMax);
// Rasterize into SDF voxels
for (int z = voxelMin.z; z <= voxelMax.z; z++)
{
for (int y = voxelMin.y; y <= voxelMax.y; y++)
{
for (int x = voxelMin.x; x <= voxelMax.x; x++)
{
int3 voxelCoord = int3(x, y, z);
int voxelIndex = GetVoxelIndex(voxelCoord);
float3 voxelPos = GetVoxelPos(voxelCoord);
float distance = SignedDistancePointToTriangle(voxelPos, v0, v1, v2);
InterlockedMin(SDF[voxelIndex], FloatFlip3(distance));
}
}
}
}
#endif
#if defined(_CS_FloodFill) || defined(_CS_Encode)
Buffer<uint> InSDF : register(t0);
float GetVoxel(int voxelIndex)
{
return asfloat(InSDF[voxelIndex]);
}
float GetVoxel(int3 coord)
{
coord = ClampVoxelCoord(coord);
int voxelIndex = GetVoxelIndex(coord);
return GetVoxel(voxelIndex);
}
float CombineSDF(float sdf, int3 nearbyCoord, float nearbyDistance)
{
// Sample nearby voxel
float sdfNearby = GetVoxel(nearbyCoord);
// Include distance to that nearby voxel
if (sdfNearby < 0.0f)
nearbyDistance *= -1;
sdfNearby += nearbyDistance;
if (sdfNearby > MaxDistance)
{
// Ignore if nearby sample is invalid (see CS_Init)
}
else if (sdf > MaxDistance)
{
// Use nearby sample if current one is invalid (see CS_Init)
sdf = sdfNearby;
}
else
{
// Use distance closer to 0
sdf = sdf >= 0 ? min(sdf, sdfNearby) : max(sdf, sdfNearby);
}
return sdf;
}
// Fills the voxels with minimum distances to the triangles.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_FloodFill(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
float sdf = GetVoxel(voxelIndex);
// Skip if the distance is already so small that we know that triangle is nearby
if (abs(sdf) > WorldUnitsPerVoxel * 1.2f)
{
int3 voxelCoord = GetVoxelCoord(voxelIndex);
int3 offset = int3(-1, 0, 1);
// Sample nearby voxels
float nearbyDistance = WorldUnitsPerVoxel;
sdf = CombineSDF(sdf, voxelCoord + offset.zyy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yzy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yyz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xyy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yxy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yyx, nearbyDistance);
#if 0
nearbyDistance = WorldUnitsPerVoxel * 1.41421f;
sdf = CombineSDF(sdf, voxelCoord + offset.xxy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xzy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zzy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zxy, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xyx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xyz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zyz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zyx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yxx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yxz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yzz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.yzx, nearbyDistance);
#endif
#if 0
nearbyDistance = WorldUnitsPerVoxel * 1.73205f;
sdf = CombineSDF(sdf, voxelCoord + offset.xxx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xxz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xzx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.xzz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zxx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zxz, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zzx, nearbyDistance);
sdf = CombineSDF(sdf, voxelCoord + offset.zzz, nearbyDistance);
#endif
}
SDF[voxelIndex] = asuint(sdf);
}
RWTexture3D<half> SDFtex : register(u1);
// Encodes SDF values into the packed format with normalized distances.
META_CS(true, FEATURE_LEVEL_SM5)
[numthreads(THREAD_GROUP_SIZE, 1, 1)]
void CS_Encode(uint3 GroupId : SV_GroupID, uint GroupIndex : SV_GroupIndex)
{
uint voxelIndex = GetVoxelIndex(GroupId, GroupIndex);
if (voxelIndex >= ResolutionSize)
return;
float sdf = GetVoxel(voxelIndex);
sdf = min(sdf, MaxDistance);
// Pack from range [-MaxDistance; +MaxDistance] to [0; 1]
sdf = (sdf / MaxDistance) * 0.5f + 0.5f;
int3 voxelCoord = GetVoxelCoord(voxelIndex);
SDFtex[voxelCoord] = sdf;
}
#endif

View File

@@ -0,0 +1,121 @@
// Source: https://github.com/GPUOpen-Effects/TressFX
// License: MIT
//
// Copyright (c) 2019 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//
//When building the SDF we want to find the lowest distance at each SDF cell. In order to allow multiple threads to write to the same
//cells, it is necessary to use atomics. However, there is no support for atomics with 32-bit floats so we convert the float into unsigned int
//and use atomic_min() / InterlockedMin() as a workaround.
//
//When used with atomic_min, both FloatFlip2() and FloatFlip3() will store the float with the lowest magnitude.
//The difference is that FloatFlip2() will preper negative values ( InterlockedMin( FloatFlip2(1.0), FloatFlip2(-1.0) ) == -1.0 ),
//while FloatFlip3() prefers positive values ( InterlockedMin( FloatFlip3(1.0), FloatFlip3(-1.0) ) == 1.0 ).
//Using FloatFlip3() seems to result in a SDF with higher quality compared to FloatFlip2().
uint FloatFlip2(float fl)
{
uint f = asuint(fl);
return (f << 1) | (f >> 31 ^ 0x00000001); //Rotate sign bit to least significant and Flip sign bit so that (0 == negative)
}
uint IFloatFlip2(uint f2)
{
return (f2 >> 1) | (f2 << 31 ^ 0x80000000);
}
uint FloatFlip3(float fl)
{
uint f = asuint(fl);
return (f << 1) | (f >> 31); //Rotate sign bit to least significant
}
uint IFloatFlip3(uint f2)
{
return (f2 >> 1) | (f2 << 31);
}
float DistancePointToEdge(float3 p, float3 x0, float3 x1, out float3 n)
{
float3 x10 = x1 - x0;
float t = dot(x1 - p, x10) / dot(x10, x10);
t = max(0.0f, min(t, 1.0f));
float3 a = p - (t*x0 + (1.0f - t)*x1);
float d = length(a);
n = a / (d + 1e-30f);
return d;
}
// Check if p is in the positive or negative side of triangle (x0, x1, x2)
// Positive side is where the normal vector of triangle ( (x1-x0) x (x2-x0) ) is pointing to.
float SignedDistancePointToTriangle(float3 p, float3 x0, float3 x1, float3 x2)
{
float d = 0;
float3 x02 = x0 - x2;
float l0 = length(x02) + 1e-30f;
x02 = x02 / l0;
float3 x12 = x1 - x2;
float l1 = dot(x12, x02);
x12 = x12 - l1*x02;
float l2 = length(x12) + 1e-30f;
x12 = x12 / l2;
float3 px2 = p - x2;
float b = dot(x12, px2) / l2;
float a = (dot(x02, px2) - l1*b) / l0;
float c = 1 - a - b;
// normal vector of triangle. Don't need to normalize this yet.
float3 nTri = cross((x1 - x0), (x2 - x0));
float3 n;
float tol = 1e-8f;
if (a >= -tol && b >= -tol && c >= -tol)
{
n = p - (a*x0 + b*x1 + c*x2);
d = length(n);
float3 n1 = n / d;
float3 n2 = nTri / (length(nTri) + 1e-30f); // if d == 0
n = (d > 0) ? n1 : n2;
}
else
{
float3 n_12;
float3 n_02;
d = DistancePointToEdge(p, x0, x1, n);
float d12 = DistancePointToEdge(p, x1, x2, n_12);
float d02 = DistancePointToEdge(p, x0, x2, n_02);
d = min(d, d12);
d = min(d, d02);
n = (d == d12) ? n_12 : n;
n = (d == d02) ? n_02 : n;
}
d = (dot(p - x0, nTri) < 0.f) ? -d : d;
return d;
}