Optimize Global Surface Atlas sampling with 3d-grid culling into chunks
This commit is contained in:
BIN
Content/Shaders/GlobalSurfaceAtlas.flax
(Stored with Git LFS)
BIN
Content/Shaders/GlobalSurfaceAtlas.flax
(Stored with Git LFS)
Binary file not shown.
@@ -18,14 +18,17 @@
|
||||
#include "Engine/Utilities/RectPack.h"
|
||||
|
||||
// This must match HLSL
|
||||
#define GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE 6 // Amount of float4s per-object
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE 5 // Amount of float4s per-tile
|
||||
#define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling
|
||||
#define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4
|
||||
#define GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE 6 // Amount of float4s per-object
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_PADDING 1 // 1px padding to prevent color bleeding between tiles
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations)
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty
|
||||
|
||||
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS
|
||||
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
|
||||
#include "Engine/Debug/DebugDraw.h"
|
||||
#endif
|
||||
|
||||
@@ -33,7 +36,8 @@ PACK_STRUCT(struct Data0
|
||||
{
|
||||
Vector3 ViewWorldPos;
|
||||
float ViewNearPlane;
|
||||
Vector2 Padding00;
|
||||
float Padding00;
|
||||
uint32 CulledObjectsCapacity;
|
||||
float LightShadowsStrength;
|
||||
float ViewFarPlane;
|
||||
Vector4 ViewFrustumWorldRays[4];
|
||||
@@ -46,8 +50,7 @@ PACK_STRUCT(struct AtlasTileVertex
|
||||
{
|
||||
Half2 Position;
|
||||
Half2 TileUV;
|
||||
uint16 ObjectIndex;
|
||||
uint16 TileIndex;
|
||||
uint32 TileAddress;
|
||||
});
|
||||
|
||||
struct GlobalSurfaceAtlasTile : RectPack<GlobalSurfaceAtlasTile, uint16>
|
||||
@@ -56,7 +59,8 @@ struct GlobalSurfaceAtlasTile : RectPack<GlobalSurfaceAtlasTile, uint16>
|
||||
Vector3 ViewPosition;
|
||||
Vector3 ViewBoundsSize;
|
||||
Matrix ViewMatrix;
|
||||
uint16 TileIndex;
|
||||
uint32 Address;
|
||||
uint32 ObjectAddressOffset;
|
||||
|
||||
GlobalSurfaceAtlasTile(uint16 x, uint16 y, uint16 width, uint16 height)
|
||||
: RectPack<GlobalSurfaceAtlasTile, uint16>(x, y, width, height)
|
||||
@@ -75,7 +79,6 @@ struct GlobalSurfaceAtlasObject
|
||||
uint64 LastFrameUsed;
|
||||
uint64 LastFrameDirty;
|
||||
GlobalSurfaceAtlasTile* Tiles[6];
|
||||
uint32 Index;
|
||||
float Radius;
|
||||
OrientedBoundingBox Bounds;
|
||||
|
||||
@@ -119,26 +122,18 @@ public:
|
||||
GPUTexture* AtlasGBuffer1 = nullptr;
|
||||
GPUTexture* AtlasGBuffer2 = nullptr;
|
||||
GPUTexture* AtlasDirectLight = nullptr;
|
||||
DynamicTypedBuffer ObjectsBuffer;
|
||||
DynamicTypedBuffer TilesBuffer;
|
||||
uint32 ObjectIndexCounter;
|
||||
uint16 TileIndexCounter;
|
||||
GPUBuffer* ChunksBuffer = nullptr;
|
||||
GPUBuffer* CulledObjectsBuffer = nullptr;
|
||||
int32 CulledObjectsCounterIndex = -1;
|
||||
GlobalSurfaceAtlasPass::BindingData Result;
|
||||
GlobalSurfaceAtlasTile* AtlasTiles = nullptr; // TODO: optimize with a single allocation for atlas tiles
|
||||
Dictionary<Actor*, GlobalSurfaceAtlasObject> Objects;
|
||||
|
||||
GlobalSurfaceAtlasCustomBuffer()
|
||||
: ObjectsBuffer(256 * GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE, PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"))
|
||||
, TilesBuffer(256 * GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE * 3 / 4, PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.TilesBuffer"))
|
||||
{
|
||||
}
|
||||
|
||||
FORCE_INLINE void ClearObjects()
|
||||
{
|
||||
CulledObjectsCounterIndex = -1;
|
||||
LastFrameAtlasDefragmentation = Engine::FrameCount;
|
||||
SAFE_DELETE(AtlasTiles);
|
||||
ObjectsBuffer.Clear();
|
||||
TilesBuffer.Clear();
|
||||
Objects.Clear();
|
||||
}
|
||||
|
||||
@@ -155,6 +150,8 @@ public:
|
||||
|
||||
~GlobalSurfaceAtlasCustomBuffer()
|
||||
{
|
||||
SAFE_DELETE_GPU_RESOURCE(ChunksBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(CulledObjectsBuffer);
|
||||
Clear();
|
||||
}
|
||||
};
|
||||
@@ -200,6 +197,7 @@ bool GlobalSurfaceAtlasPass::setupResources()
|
||||
_cb0 = shader->GetCB(0);
|
||||
if (!_cb0)
|
||||
return true;
|
||||
_csCullObjects = shader->GetCS("CS_CullObjects");
|
||||
|
||||
// Create pipeline state
|
||||
GPUPipelineState::Description psDesc = GPUPipelineState::Description::DefaultFullscreenTriangle;
|
||||
@@ -260,6 +258,8 @@ void GlobalSurfaceAtlasPass::Dispose()
|
||||
|
||||
// Cleanup
|
||||
SAFE_DELETE(_vertexBuffer);
|
||||
SAFE_DELETE(_objectsBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(_culledObjectsSizeBuffer);
|
||||
SAFE_DELETE_GPU_RESOURCE(_psClear);
|
||||
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting0);
|
||||
SAFE_DELETE_GPU_RESOURCE(_psDirectLighting1);
|
||||
@@ -317,6 +317,13 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm);
|
||||
#undef INIT_ATLAS_TEXTURE
|
||||
surfaceAtlasData.Resolution = resolution;
|
||||
if (!surfaceAtlasData.ChunksBuffer)
|
||||
{
|
||||
surfaceAtlasData.ChunksBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.ChunksBuffer"));
|
||||
if (surfaceAtlasData.ChunksBuffer->Init(GPUBufferDescription::Raw(sizeof(uint32) * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION, GPUBufferFlags::ShaderResource | GPUBufferFlags::UnorderedAccess)))
|
||||
return true;
|
||||
memUsage += surfaceAtlasData.ChunksBuffer->GetMemoryUsage();
|
||||
}
|
||||
LOG(Info, "Global Surface Atlas resolution: {0}, memory usage: {1} MB", resolution, memUsage / 1024 / 1024);
|
||||
}
|
||||
else
|
||||
@@ -333,6 +340,10 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
surfaceAtlasData.AtlasTiles = New<GlobalSurfaceAtlasTile>(0, 0, resolution, resolution);
|
||||
if (!_vertexBuffer)
|
||||
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
|
||||
if (!_objectsBuffer)
|
||||
_objectsBuffer = New<DynamicTypedBuffer>(256 * (GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE + GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE * 3 / 4), PixelFormat::R32G32B32A32_Float, false, TEXT("GlobalSurfaceAtlas.ObjectsBuffer"));
|
||||
|
||||
// Utility for writing into tiles vertex buffer
|
||||
const Vector2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
|
||||
const Vector2 posToClipAdd(-1.0f, 1.0f);
|
||||
#define VB_WRITE_TILE_POS_ONLY(tile) \
|
||||
@@ -350,11 +361,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
Half2 min(minPos * posToClipMul + posToClipAdd), max(maxPos * posToClipMul + posToClipAdd); \
|
||||
Vector2 minUV(0, 0), maxUV(1, 1); \
|
||||
auto* quad = _vertexBuffer->WriteReserve<AtlasTileVertex>(6); \
|
||||
quad[0] = { { max }, { maxUV }, (uint16)object.Index, tile->TileIndex }; \
|
||||
quad[1] = { { min.X, max.Y }, { minUV.X, maxUV.Y }, (uint16)object.Index, tile->TileIndex }; \
|
||||
quad[2] = { { min }, { minUV }, (uint16)object.Index, tile->TileIndex }; \
|
||||
quad[0] = { { max }, { maxUV }, tile->Address }; \
|
||||
quad[1] = { { min.X, max.Y }, { minUV.X, maxUV.Y }, tile->Address }; \
|
||||
quad[2] = { { min }, { minUV }, tile->Address }; \
|
||||
quad[3] = quad[2]; \
|
||||
quad[4] = { { max.X, min.Y }, { maxUV.X, minUV.Y }, (uint16)object.Index, tile->TileIndex }; \
|
||||
quad[4] = { { max.X, min.Y }, { maxUV.X, minUV.Y }, tile->Address }; \
|
||||
quad[5] = quad[0]
|
||||
#define VB_DRAW() \
|
||||
_vertexBuffer->Flush(context); \
|
||||
@@ -363,15 +374,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1);
|
||||
|
||||
// Add objects into the atlas
|
||||
surfaceAtlasData.ObjectsBuffer.Clear();
|
||||
surfaceAtlasData.TilesBuffer.Clear();
|
||||
surfaceAtlasData.ObjectIndexCounter = 0;
|
||||
{
|
||||
// Tile at index 0 is invalid
|
||||
surfaceAtlasData.TileIndexCounter = 1;
|
||||
auto* tileData = surfaceAtlasData.TilesBuffer.WriteReserve<Vector4>(GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE);
|
||||
Platform::MemoryClear(tileData, sizeof(Vector4) * GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE);
|
||||
}
|
||||
_objectsBuffer->Clear();
|
||||
_dirtyObjectsBuffer.Clear();
|
||||
{
|
||||
PROFILE_CPU_NAMED("Draw");
|
||||
@@ -480,24 +483,26 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Write to objects buffer (this must match unpacking logic in HLSL)
|
||||
Matrix worldToLocalBounds;
|
||||
Matrix::Invert(object->Bounds.Transformation, worldToLocalBounds);
|
||||
// TODO: cache data for static objects to optimize CPU perf (move ObjectsBuffer into surfaceAtlasData)
|
||||
object->Index = surfaceAtlasData.ObjectIndexCounter++;
|
||||
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Vector4>(GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE);
|
||||
uint32 objectAddress = _objectsBuffer->Data.Count() / sizeof(Vector4);
|
||||
auto* objectData = _objectsBuffer->WriteReserve<Vector4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = *(Vector4*)&e.Bounds;
|
||||
objectData[1] = Vector4(worldToLocalBounds.M11, worldToLocalBounds.M12, worldToLocalBounds.M13, worldToLocalBounds.M41);
|
||||
objectData[2] = Vector4(worldToLocalBounds.M21, worldToLocalBounds.M22, worldToLocalBounds.M23, worldToLocalBounds.M42);
|
||||
objectData[3] = Vector4(worldToLocalBounds.M31, worldToLocalBounds.M32, worldToLocalBounds.M33, worldToLocalBounds.M43);
|
||||
objectData[4] = Vector4(object->Bounds.Extents, 0.0f); // w unused
|
||||
objectData[5] = Vector4::Zero; // w unused
|
||||
auto tileIndices = reinterpret_cast<uint16*>(&objectData[5]); // xyz used for tile indices packed into uint16
|
||||
// TODO: try to optimize memory footprint (eg. merge scale into extents and use rotation+offset but reconstruct rotation from two axes with sign)
|
||||
objectData[1] = Vector4::Zero; // w unused
|
||||
objectData[2] = Vector4(worldToLocalBounds.M11, worldToLocalBounds.M12, worldToLocalBounds.M13, worldToLocalBounds.M41);
|
||||
objectData[3] = Vector4(worldToLocalBounds.M21, worldToLocalBounds.M22, worldToLocalBounds.M23, worldToLocalBounds.M42);
|
||||
objectData[4] = Vector4(worldToLocalBounds.M31, worldToLocalBounds.M32, worldToLocalBounds.M33, worldToLocalBounds.M43);
|
||||
objectData[5] = Vector4(object->Bounds.Extents, 0.0f); // w unused
|
||||
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
|
||||
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Vector4s for object+tiles)
|
||||
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
auto* tile = object->Tiles[tileIndex];
|
||||
if (!tile)
|
||||
continue;
|
||||
tile->TileIndex = surfaceAtlasData.TileIndexCounter++;
|
||||
tileIndices[tileIndex] = tile->TileIndex;
|
||||
tile->ObjectAddressOffset = *objectDataSize;
|
||||
tile->Address = objectAddress + tile->ObjectAddressOffset;
|
||||
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
|
||||
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
|
||||
|
||||
// Setup view to render object from the side
|
||||
Vector3 xAxis, yAxis, zAxis = Vector3::Zero;
|
||||
@@ -530,7 +535,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = surfaceAtlasData.TilesBuffer.WriteReserve<Vector4>(GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE);
|
||||
auto* tileData = _objectsBuffer->WriteReserve<Vector4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Vector4(tile->X, tile->Y, tileWidth, tileHeight) * resolutionInv;
|
||||
tileData[1] = Vector4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Vector4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
@@ -558,13 +563,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
}
|
||||
}
|
||||
|
||||
// Send objects data to the GPU
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Objects");
|
||||
surfaceAtlasData.ObjectsBuffer.Flush(context);
|
||||
surfaceAtlasData.TilesBuffer.Flush(context);
|
||||
}
|
||||
|
||||
// Rasterize world geometry material properties into Global Surface Atlas
|
||||
if (_dirtyObjectsBuffer.Count() != 0)
|
||||
{
|
||||
@@ -678,16 +676,142 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
RenderList::ReturnToPool(renderContextTiles.List);
|
||||
}
|
||||
|
||||
// Send objects data to the GPU
|
||||
{
|
||||
PROFILE_GPU_CPU("Update Objects");
|
||||
_objectsBuffer->Flush(context);
|
||||
}
|
||||
|
||||
// Init constants
|
||||
result.GlobalSurfaceAtlas.ViewPos = renderContext.View.Position;
|
||||
result.GlobalSurfaceAtlas.Resolution = (float)resolution;
|
||||
result.GlobalSurfaceAtlas.ChunkSize = distance / (float)GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION;
|
||||
result.GlobalSurfaceAtlas.ObjectsCount = surfaceAtlasData.Objects.Count();
|
||||
|
||||
// Cull objects into chunks (for faster Atlas sampling)
|
||||
if (surfaceAtlasData.Objects.Count() != 0)
|
||||
{
|
||||
// Each chunk (ChunksBuffer) contains uint with address of the culled objects data start in CulledObjectsBuffer.
|
||||
// If chunk has address=0 then it's unused/empty.
|
||||
// Chunk [0,0,0] is unused and it's address=0 is used for atomic counter for writing into CulledObjectsBuffer.
|
||||
// Each chunk data contains objects count + all objects with tiles copied into buffer.
|
||||
// This allows to quickly convert world-space position into chunk, then read chunk data start and loop over culled objects (less objects and data already in place).
|
||||
PROFILE_GPU_CPU("Cull Objects");
|
||||
uint32 objectsBufferCapacity = (uint32)((float)_objectsBuffer->Data.Count() * 1.3f);
|
||||
|
||||
// Copy counter from ChunksBuffer into staging buffer to access current chunks memory usage to adapt dynamically to the scene complexity
|
||||
if (surfaceAtlasData.ChunksBuffer)
|
||||
{
|
||||
if (!_culledObjectsSizeBuffer)
|
||||
{
|
||||
Platform::MemoryClear(_culledObjectsSizeFrames, sizeof(_culledObjectsSizeFrames));
|
||||
_culledObjectsSizeBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsSizeBuffer"));
|
||||
const GPUBufferDescription desc = GPUBufferDescription::Buffer(ARRAY_COUNT(_culledObjectsSizeFrames) * sizeof(uint32), GPUBufferFlags::None, PixelFormat::R32_UInt, _culledObjectsSizeFrames, sizeof(uint32), GPUResourceUsage::StagingReadback);
|
||||
if (_culledObjectsSizeBuffer->Init(desc))
|
||||
return true;
|
||||
}
|
||||
if (surfaceAtlasData.CulledObjectsCounterIndex != -1)
|
||||
{
|
||||
// Get the last counter value (accept staging readback delay)
|
||||
auto data = (uint32*)_culledObjectsSizeBuffer->Map(GPUResourceMapMode::Read);
|
||||
uint32 counter = data[surfaceAtlasData.CulledObjectsCounterIndex];
|
||||
_culledObjectsSizeBuffer->Unmap();
|
||||
if (counter > 0)
|
||||
objectsBufferCapacity = counter * sizeof(Vector4);
|
||||
}
|
||||
if (surfaceAtlasData.CulledObjectsCounterIndex == -1)
|
||||
{
|
||||
// Find a free timer slot
|
||||
for (int32 i = 0; i < ARRAY_COUNT(_culledObjectsSizeFrames); i++)
|
||||
{
|
||||
if (currentFrame - _culledObjectsSizeFrames[i] > GPU_ASYNC_LATENCY)
|
||||
{
|
||||
surfaceAtlasData.CulledObjectsCounterIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (surfaceAtlasData.CulledObjectsCounterIndex != -1)
|
||||
{
|
||||
// Copy current counter value
|
||||
_culledObjectsSizeFrames[surfaceAtlasData.CulledObjectsCounterIndex] = currentFrame;
|
||||
context->CopyBuffer(_culledObjectsSizeBuffer, surfaceAtlasData.ChunksBuffer, sizeof(uint32), surfaceAtlasData.CulledObjectsCounterIndex * sizeof(uint32), 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate buffer for culled objects (estimated size)
|
||||
objectsBufferCapacity = Math::AlignUp(objectsBufferCapacity, 4096u);
|
||||
if (!surfaceAtlasData.CulledObjectsBuffer)
|
||||
surfaceAtlasData.CulledObjectsBuffer = GPUDevice::Instance->CreateBuffer(TEXT("GlobalSurfaceAtlas.CulledObjectsBuffer"));
|
||||
if (surfaceAtlasData.CulledObjectsBuffer->GetSize() < objectsBufferCapacity)
|
||||
{
|
||||
const GPUBufferDescription desc = GPUBufferDescription::Buffer(objectsBufferCapacity, GPUBufferFlags::UnorderedAccess | GPUBufferFlags::ShaderResource, PixelFormat::R32G32B32A32_Float, nullptr, sizeof(Vector4));
|
||||
if (surfaceAtlasData.CulledObjectsBuffer->Init(desc))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Clear chunks counter (chunk at 0 is used for a counter so chunks buffer is aligned)
|
||||
uint32 counter = 1; // Indicate that 1st float4 is used so value 0 can be used as invalid chunk address
|
||||
context->UpdateBuffer(surfaceAtlasData.ChunksBuffer, &counter, sizeof(counter), 0);
|
||||
|
||||
// Cull objects into chunks (1 thread per chunk)
|
||||
Data0 data;
|
||||
data.ViewWorldPos = renderContext.View.Position;
|
||||
data.ViewNearPlane = renderContext.View.Near;
|
||||
data.ViewFarPlane = renderContext.View.Far;
|
||||
data.CulledObjectsCapacity = objectsBufferCapacity;
|
||||
data.GlobalSurfaceAtlas = result.GlobalSurfaceAtlas;
|
||||
context->UpdateCB(_cb0, &data);
|
||||
context->BindCB(0, _cb0);
|
||||
static_assert(GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION % GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE == 0, "Invalid chunks resolution/groups setting.");
|
||||
const int32 chunkDispatchGroups = GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION / GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE;
|
||||
context->BindSR(0, _objectsBuffer->GetBuffer()->View());
|
||||
context->BindUA(0, surfaceAtlasData.ChunksBuffer->View());
|
||||
context->BindUA(1, surfaceAtlasData.CulledObjectsBuffer->View());
|
||||
context->Dispatch(_csCullObjects, chunkDispatchGroups, chunkDispatchGroups, chunkDispatchGroups);
|
||||
context->ResetUA();
|
||||
|
||||
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
|
||||
// Debug draw tiles that have any objects inside
|
||||
for (int32 z = 0; z < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; z++)
|
||||
{
|
||||
for (int32 y = 0; y < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; y++)
|
||||
{
|
||||
for (int32 x = 0; x < GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION; x++)
|
||||
{
|
||||
Vector3 chunkCoord(x, y, z);
|
||||
Vector3 chunkMin = result.GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * result.GlobalSurfaceAtlas.ChunkSize;
|
||||
Vector3 chunkMax = chunkMin + result.GlobalSurfaceAtlas.ChunkSize;
|
||||
BoundingBox chunkBounds(chunkMin, chunkMax);
|
||||
if (Vector3::Distance(chunkBounds.GetCenter(), result.GlobalSurfaceAtlas.ViewPos) >= 2000.0f)
|
||||
continue;
|
||||
|
||||
int32 count = 0;
|
||||
for (auto& e : surfaceAtlasData.Objects)
|
||||
{
|
||||
BoundingSphere objectBounds(e.Value.Bounds.GetCenter(), e.Value.Radius);
|
||||
if (chunkBounds.Intersects(objectBounds))
|
||||
count++;
|
||||
}
|
||||
if (count != 0)
|
||||
{
|
||||
DebugDraw::DrawText(String::Format(TEXT("{} Objects"), count), chunkBounds.GetCenter(), Color::Green);
|
||||
DebugDraw::DrawWireBox(chunkBounds, Color::Green);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Copy results
|
||||
result.Atlas[0] = surfaceAtlasData.AtlasDepth;
|
||||
result.Atlas[1] = surfaceAtlasData.AtlasGBuffer0;
|
||||
result.Atlas[2] = surfaceAtlasData.AtlasGBuffer1;
|
||||
result.Atlas[3] = surfaceAtlasData.AtlasGBuffer2;
|
||||
result.Atlas[4] = surfaceAtlasData.AtlasDirectLight;
|
||||
result.Objects = surfaceAtlasData.ObjectsBuffer.GetBuffer();
|
||||
result.Tiles = surfaceAtlasData.TilesBuffer.GetBuffer();
|
||||
result.GlobalSurfaceAtlas.Resolution = (float)resolution;
|
||||
result.GlobalSurfaceAtlas.ObjectsCount = surfaceAtlasData.Objects.Count();
|
||||
result.Chunks = surfaceAtlasData.ChunksBuffer;
|
||||
result.CulledObjects = surfaceAtlasData.CulledObjectsBuffer;
|
||||
surfaceAtlasData.Result = result;
|
||||
|
||||
// Render direct lighting into atlas
|
||||
@@ -708,12 +832,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->BindSR(1, surfaceAtlasData.AtlasGBuffer1->View());
|
||||
context->BindSR(2, surfaceAtlasData.AtlasGBuffer2->View());
|
||||
context->BindSR(3, surfaceAtlasData.AtlasDepth->View());
|
||||
context->BindSR(4, surfaceAtlasData.ObjectsBuffer.GetBuffer()->View());
|
||||
context->BindSR(5, surfaceAtlasData.TilesBuffer.GetBuffer()->View());
|
||||
context->BindSR(4, _objectsBuffer->GetBuffer()->View());
|
||||
for (int32 i = 0; i < 4; i++)
|
||||
{
|
||||
context->BindSR(i + 6, bindingDataSDF.Cascades[i]->ViewVolume());
|
||||
context->BindSR(i + 10, bindingDataSDF.CascadeMips[i]->ViewVolume());
|
||||
context->BindSR(i + 5, bindingDataSDF.Cascades[i]->ViewVolume());
|
||||
context->BindSR(i + 9, bindingDataSDF.CascadeMips[i]->ViewVolume());
|
||||
}
|
||||
context->BindCB(0, _cb0);
|
||||
Data0 data;
|
||||
@@ -843,8 +966,8 @@ void GlobalSurfaceAtlasPass::RenderDebug(RenderContext& renderContext, GPUContex
|
||||
context->BindSR(i, bindingDataSDF.Cascades[i]->ViewVolume());
|
||||
context->BindSR(i + 4, bindingDataSDF.CascadeMips[i]->ViewVolume());
|
||||
}
|
||||
context->BindSR(8, bindingData.Objects ? bindingData.Objects->View() : nullptr);
|
||||
context->BindSR(9, bindingData.Tiles ? bindingData.Tiles->View() : nullptr);
|
||||
context->BindSR(8, bindingData.Chunks ? bindingData.Chunks->View() : nullptr);
|
||||
context->BindSR(9, bindingData.CulledObjects ? bindingData.CulledObjects->View() : nullptr);
|
||||
context->BindSR(10, bindingData.Atlas[0]->View());
|
||||
{
|
||||
//GPUTexture* tex = bindingData.Atlas[1]; // Preview diffuse
|
||||
|
||||
@@ -13,8 +13,11 @@ public:
|
||||
// Constant buffer data for Global Surface Atlas access on a GPU.
|
||||
PACK_STRUCT(struct GlobalSurfaceAtlasData
|
||||
{
|
||||
Vector2 Padding;
|
||||
Vector3 ViewPos;
|
||||
float Padding0;
|
||||
float Padding1;
|
||||
float Resolution;
|
||||
float ChunkSize;
|
||||
uint32 ObjectsCount;
|
||||
});
|
||||
|
||||
@@ -22,8 +25,8 @@ public:
|
||||
struct BindingData
|
||||
{
|
||||
GPUTexture* Atlas[5];
|
||||
GPUBuffer* Objects;
|
||||
GPUBuffer* Tiles;
|
||||
GPUBuffer* Chunks;
|
||||
GPUBuffer* CulledObjects;
|
||||
GlobalSurfaceAtlasData GlobalSurfaceAtlas;
|
||||
};
|
||||
|
||||
@@ -35,10 +38,14 @@ private:
|
||||
GPUPipelineState* _psDirectLighting1 = nullptr;
|
||||
GPUPipelineState* _psDebug = nullptr;
|
||||
GPUConstantBuffer* _cb0 = nullptr;
|
||||
GPUShaderProgramCS* _csCullObjects;
|
||||
|
||||
// Rasterization cache
|
||||
// Cache
|
||||
class GPUBuffer* _culledObjectsSizeBuffer = nullptr;
|
||||
class DynamicTypedBuffer* _objectsBuffer = nullptr;
|
||||
class DynamicVertexBuffer* _vertexBuffer = nullptr;
|
||||
Array<Actor*> _dirtyObjectsBuffer;
|
||||
uint64 _culledObjectsSizeFrames[8];
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
|
||||
@@ -35,4 +35,11 @@ float2 LineHitBox(float3 lineStart, float3 lineEnd, float3 boxMin, float3 boxMax
|
||||
return saturate(intersections);
|
||||
}
|
||||
|
||||
// Determines whether there is an intersection between a box and a sphere.
|
||||
bool BoxIntersectsSphere(float3 boxMin, float3 boxMax, float3 sphereCenter, float sphereRadius)
|
||||
{
|
||||
const float3 clampedCenter = clamp(sphereCenter, boxMin, boxMax);
|
||||
return distance(sphereCenter, clampedCenter) <= sphereRadius;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -4,8 +4,9 @@
|
||||
#include "./Flax/Collisions.hlsl"
|
||||
|
||||
// This must match C++
|
||||
#define GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE 6 // Amount of float4s per-object
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE 5 // Amount of float4s per-tile
|
||||
#define GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION 40 // Amount of chunks (in each direction) to split atlas draw distance for objects culling
|
||||
#define GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE 4
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE 5 // Amount of float4s per-tile
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD 0.1f // Cut-off value for tiles transitions blending during sampling
|
||||
#define GLOBAL_SURFACE_ATLAS_TILE_PROJ_PLANE_OFFSET 0.1f // Small offset to prevent clipping with the closest triangles (shifts near and far planes)
|
||||
|
||||
@@ -22,55 +23,60 @@ struct GlobalSurfaceObject
|
||||
float BoundsRadius;
|
||||
float4x4 WorldToLocal;
|
||||
float3 Extent;
|
||||
uint TileIndices[6];
|
||||
uint TileOffsets[6];
|
||||
uint DataSize; // count of float4s for object+tiles
|
||||
};
|
||||
|
||||
float4 LoadGlobalSurfaceAtlasObjectBounds(Buffer<float4> objects, uint objectIndex)
|
||||
float4 LoadGlobalSurfaceAtlasObjectBounds(Buffer<float4> objects, uint objectAddress)
|
||||
{
|
||||
// This must match C++
|
||||
const uint objectStart = objectIndex * GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE;
|
||||
return objects.Load(objectStart);
|
||||
return objects.Load(objectAddress + 0);
|
||||
}
|
||||
|
||||
GlobalSurfaceObject LoadGlobalSurfaceAtlasObject(Buffer<float4> objects, uint objectIndex)
|
||||
uint LoadGlobalSurfaceAtlasObjectDataSize(Buffer<float4> objects, uint objectAddress)
|
||||
{
|
||||
// This must match C++
|
||||
const uint objectStart = objectIndex * GLOBAL_SURFACE_ATLAS_OBJECT_BUFFER_STRIDE;
|
||||
float4 vector0 = objects.Load(objectStart + 0);
|
||||
float4 vector1 = objects.Load(objectStart + 1);
|
||||
float4 vector2 = objects.Load(objectStart + 2);
|
||||
float4 vector3 = objects.Load(objectStart + 3);
|
||||
float4 vector4 = objects.Load(objectStart + 4); // w unused
|
||||
float4 vector5 = objects.Load(objectStart + 5); // w unused
|
||||
return asuint(objects.Load(objectAddress + 1).w);
|
||||
}
|
||||
|
||||
GlobalSurfaceObject LoadGlobalSurfaceAtlasObject(Buffer<float4> objects, uint objectAddress)
|
||||
{
|
||||
// This must match C++
|
||||
float4 vector0 = objects.Load(objectAddress + 0);
|
||||
float4 vector1 = objects.Load(objectAddress + 1);
|
||||
float4 vector2 = objects.Load(objectAddress + 2);
|
||||
float4 vector3 = objects.Load(objectAddress + 3);
|
||||
float4 vector4 = objects.Load(objectAddress + 4);
|
||||
float4 vector5 = objects.Load(objectAddress + 5); // w unused
|
||||
GlobalSurfaceObject object = (GlobalSurfaceObject)0;
|
||||
object.BoundsPosition = vector0.xyz;
|
||||
object.BoundsRadius = vector0.w;
|
||||
object.WorldToLocal[0] = float4(vector1.xyz, 0.0f);
|
||||
object.WorldToLocal[1] = float4(vector2.xyz, 0.0f);
|
||||
object.WorldToLocal[2] = float4(vector3.xyz, 0.0f);
|
||||
object.WorldToLocal[3] = float4(vector1.w, vector2.w, vector3.w, 1.0f);
|
||||
object.Extent = vector4.xyz;
|
||||
uint vector5x = asuint(vector5.x);
|
||||
uint vector5y = asuint(vector5.y);
|
||||
uint vector5z = asuint(vector5.z);
|
||||
object.TileIndices[0] = vector5x & 0xffff; // Limitation on max 65k active tiles
|
||||
object.TileIndices[1] = vector5x >> 16;
|
||||
object.TileIndices[2] = vector5y & 0xffff;
|
||||
object.TileIndices[3] = vector5y >> 16;
|
||||
object.TileIndices[4] = vector5z & 0xffff;
|
||||
object.TileIndices[5] = vector5z >> 16;
|
||||
object.WorldToLocal[0] = float4(vector2.xyz, 0.0f);
|
||||
object.WorldToLocal[1] = float4(vector3.xyz, 0.0f);
|
||||
object.WorldToLocal[2] = float4(vector4.xyz, 0.0f);
|
||||
object.WorldToLocal[3] = float4(vector2.w, vector3.w, vector4.w, 1.0f);
|
||||
object.Extent = vector5.xyz;
|
||||
uint vector1x = asuint(vector1.x);
|
||||
uint vector1y = asuint(vector1.y);
|
||||
uint vector1z = asuint(vector1.z);
|
||||
object.DataSize = asuint(vector1.w);
|
||||
object.TileOffsets[0] = vector1x & 0xffff;
|
||||
object.TileOffsets[1] = vector1x >> 16;
|
||||
object.TileOffsets[2] = vector1y & 0xffff;
|
||||
object.TileOffsets[3] = vector1y >> 16;
|
||||
object.TileOffsets[4] = vector1z & 0xffff;
|
||||
object.TileOffsets[5] = vector1z >> 16;
|
||||
return object;
|
||||
}
|
||||
|
||||
GlobalSurfaceTile LoadGlobalSurfaceAtlasTile(Buffer<float4> objects, uint tileIndex)
|
||||
GlobalSurfaceTile LoadGlobalSurfaceAtlasTile(Buffer<float4> objects, uint tileAddress)
|
||||
{
|
||||
// This must match C++
|
||||
const uint tileStart = tileIndex * GLOBAL_SURFACE_ATLAS_TILE_BUFFER_STRIDE;
|
||||
float4 vector0 = objects.Load(tileStart + 0);
|
||||
float4 vector1 = objects.Load(tileStart + 1);
|
||||
float4 vector2 = objects.Load(tileStart + 2);
|
||||
float4 vector3 = objects.Load(tileStart + 3);
|
||||
float4 vector4 = objects.Load(tileStart + 4); // w unused
|
||||
float4 vector0 = objects.Load(tileAddress + 0);
|
||||
float4 vector1 = objects.Load(tileAddress + 1);
|
||||
float4 vector2 = objects.Load(tileAddress + 2);
|
||||
float4 vector3 = objects.Load(tileAddress + 3);
|
||||
float4 vector4 = objects.Load(tileAddress + 4); // w unused
|
||||
GlobalSurfaceTile tile = (GlobalSurfaceTile)0;
|
||||
tile.AtlasRectUV = vector0.xyzw;
|
||||
tile.WorldToLocal[0] = float4(vector1.xyz, 0.0f);
|
||||
@@ -84,8 +90,11 @@ GlobalSurfaceTile LoadGlobalSurfaceAtlasTile(Buffer<float4> objects, uint tileIn
|
||||
// Global Surface Atlas data for a constant buffer
|
||||
struct GlobalSurfaceAtlasData
|
||||
{
|
||||
float2 Padding;
|
||||
float3 ViewPos;
|
||||
float Padding0;
|
||||
float Padding1;
|
||||
float Resolution;
|
||||
float ChunkSize;
|
||||
uint ObjectsCount;
|
||||
};
|
||||
|
||||
@@ -147,19 +156,38 @@ float4 SampleGlobalSurfaceAtlasTile(const GlobalSurfaceAtlasData data, GlobalSur
|
||||
}
|
||||
|
||||
// Samples the Global Surface Atlas and returns the lighting (with opacity) at the given world location (and direction).
|
||||
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, Buffer<float4> objects, Buffer<float4> tiles, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal)
|
||||
float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, ByteAddressBuffer chunks, Buffer<float4> culledObjects, Texture2D depth, Texture2D atlas, float3 worldPosition, float3 worldNormal)
|
||||
{
|
||||
float4 result = float4(0, 0, 0, 0);
|
||||
float surfaceThreshold = 20.0f; // Additional threshold between object or tile size compared with input data (error due to SDF or LOD incorrect appearance)
|
||||
// TODO: add grid culling to object for faster lookup
|
||||
|
||||
// Snap to the closest chunk to get culled objects
|
||||
uint3 chunkCoord = (uint3)clamp(floor((worldPosition - data.ViewPos) / data.ChunkSize + (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)), 0, GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION - 1);
|
||||
uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4;
|
||||
uint objectsStart = chunks.Load(chunkAddress);
|
||||
if (objectsStart == 0)
|
||||
{
|
||||
// Empty chunk
|
||||
return result;
|
||||
}
|
||||
|
||||
// Read objects counter
|
||||
float4 chunkHeader = culledObjects[objectsStart];
|
||||
objectsStart++;
|
||||
uint objectsCount = asuint(chunkHeader.x);
|
||||
|
||||
// Loop over culled objects inside the chunk
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < data.ObjectsCount; objectIndex++)
|
||||
for (uint objectIndex = 0; objectIndex < objectsCount; objectIndex++)
|
||||
{
|
||||
// Cull point vs sphere
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(objects, objectIndex);
|
||||
uint objectAddress = objectsStart;
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(culledObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(culledObjects, objectAddress);
|
||||
objectsStart += objectSize;
|
||||
if (distance(objectBounds.xyz, worldPosition) > objectBounds.w)
|
||||
continue;
|
||||
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(objects, objectIndex);
|
||||
GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(culledObjects, objectAddress);
|
||||
float3 localPosition = mul(float4(worldPosition, 1), object.WorldToLocal).xyz;
|
||||
float3 localExtent = object.Extent + surfaceThreshold;
|
||||
if (any(localPosition > localExtent) || any(localPosition < -localExtent))
|
||||
@@ -168,22 +196,22 @@ float4 SampleGlobalSurfaceAtlas(const GlobalSurfaceAtlasData data, Buffer<float4
|
||||
// Sample tiles based on the directionality
|
||||
float3 localNormal = normalize(mul(worldNormal, (float3x3)object.WorldToLocal));
|
||||
float3 localNormalSq = localNormal * localNormal;
|
||||
uint tileIndex = object.TileIndices[localNormal.x > 0.0f ? 0 : 1];
|
||||
if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileIndex != 0)
|
||||
uint tileOffset = object.TileOffsets[localNormal.x > 0.0f ? 0 : 1];
|
||||
if (localNormalSq.x > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(tiles, tileIndex);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileIndex = object.TileIndices[localNormal.y > 0.0f ? 2 : 3];
|
||||
if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileIndex != 0)
|
||||
tileOffset = object.TileOffsets[localNormal.y > 0.0f ? 2 : 3];
|
||||
if (localNormalSq.y > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(tiles, tileIndex);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
tileIndex = object.TileIndices[localNormal.z > 0.0f ? 4 : 5];
|
||||
if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileIndex != 0)
|
||||
tileOffset = object.TileOffsets[localNormal.z > 0.0f ? 4 : 5];
|
||||
if (localNormalSq.z > GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD * GLOBAL_SURFACE_ATLAS_TILE_NORMAL_THRESHOLD && tileOffset != 0)
|
||||
{
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(tiles, tileIndex);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(culledObjects, objectAddress + tileOffset);
|
||||
result += SampleGlobalSurfaceAtlasTile(data, tile, depth, atlas, worldPosition, worldNormal, surfaceThreshold);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,8 @@
|
||||
META_CB_BEGIN(0, Data)
|
||||
float3 ViewWorldPos;
|
||||
float ViewNearPlane;
|
||||
float2 Padding00;
|
||||
float Padding00;
|
||||
uint CulledObjectsCapacity;
|
||||
float LightShadowsStrength;
|
||||
float ViewFarPlane;
|
||||
float4 ViewFrustumWorldRays[4];
|
||||
@@ -25,27 +26,27 @@ struct AtlasVertexIput
|
||||
{
|
||||
float2 Position : POSITION0;
|
||||
float2 TileUV : TEXCOORD0;
|
||||
uint2 Index : TEXCOORD1;
|
||||
uint TileAddress : TEXCOORD1;
|
||||
};
|
||||
|
||||
struct AtlasVertexOutput
|
||||
{
|
||||
float4 Position : SV_Position;
|
||||
float2 TileUV : TEXCOORD0;
|
||||
nointerpolation uint2 Index : TEXCOORD1;
|
||||
nointerpolation uint TileAddress : TEXCOORD1;
|
||||
};
|
||||
|
||||
// Vertex shader for Global Surface Atlas rendering (custom vertex buffer to render per-tile)
|
||||
META_VS(true, FEATURE_LEVEL_SM5)
|
||||
META_VS_IN_ELEMENT(POSITION, 0, R16G16_FLOAT, 0, ALIGN, PER_VERTEX, 0, true)
|
||||
META_VS_IN_ELEMENT(TEXCOORD, 0, R16G16_FLOAT, 0, ALIGN, PER_VERTEX, 0, true)
|
||||
META_VS_IN_ELEMENT(TEXCOORD, 1, R16G16_UINT, 0, ALIGN, PER_VERTEX, 0, true)
|
||||
META_VS_IN_ELEMENT(TEXCOORD, 1, R32_UINT, 0, ALIGN, PER_VERTEX, 0, true)
|
||||
AtlasVertexOutput VS_Atlas(AtlasVertexIput input)
|
||||
{
|
||||
AtlasVertexOutput output;
|
||||
output.Position = float4(input.Position, 1, 1);
|
||||
output.TileUV = input.TileUV;
|
||||
output.Index = input.Index;
|
||||
output.TileAddress = input.TileAddress;
|
||||
return output;
|
||||
}
|
||||
|
||||
@@ -67,9 +68,8 @@ void PS_Clear(out float4 Light : SV_Target0, out float4 RT0 : SV_Target1, out fl
|
||||
|
||||
// GBuffer+Depth at 0-3 slots
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t4);
|
||||
Buffer<float4> GlobalSurfaceAtlasTiles : register(t5);
|
||||
Texture3D<float> GlobalSDFTex[4] : register(t6);
|
||||
Texture3D<float> GlobalSDFMip[4] : register(t10);
|
||||
Texture3D<float> GlobalSDFTex[4] : register(t5);
|
||||
Texture3D<float> GlobalSDFMip[4] : register(t9);
|
||||
|
||||
// Pixel shader for Global Surface Atlas shading with direct light contribution
|
||||
META_PS(true, FEATURE_LEVEL_SM5)
|
||||
@@ -78,8 +78,7 @@ META_PERMUTATION_1(RADIAL_LIGHT=1)
|
||||
float4 PS_DirectLighting(AtlasVertexOutput input) : SV_Target
|
||||
{
|
||||
// Load current tile info
|
||||
//GlobalSurfaceObject object = LoadGlobalSurfaceAtlasObject(GlobalSurfaceAtlasObjects, input.Index.x);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(GlobalSurfaceAtlasTiles, input.Index.y);
|
||||
GlobalSurfaceTile tile = LoadGlobalSurfaceAtlasTile(GlobalSurfaceAtlasObjects, input.TileAddress);
|
||||
float2 atlasUV = input.TileUV * tile.AtlasRectUV.zw + tile.AtlasRectUV.xy;
|
||||
|
||||
// Load GBuffer sample from atlas
|
||||
@@ -157,12 +156,93 @@ float4 PS_DirectLighting(AtlasVertexOutput input) : SV_Target
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_CS_CullObjects)
|
||||
|
||||
#include "./Flax/Collisions.hlsl"
|
||||
|
||||
RWByteAddressBuffer RWGlobalSurfaceAtlasChunks : register(u0);
|
||||
RWBuffer<float4> RWGlobalSurfaceAtlasCulledObjects : register(u1);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t0);
|
||||
|
||||
// Compute shader for culling objects into chunks
|
||||
META_CS(true, FEATURE_LEVEL_SM5)
|
||||
[numthreads(GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE, GLOBAL_SURFACE_ATLAS_CHUNKS_GROUP_SIZE)]
|
||||
void CS_CullObjects(uint3 GroupId : SV_GroupID, uint3 DispatchThreadId : SV_DispatchThreadID, uint3 GroupThreadId : SV_GroupThreadID)
|
||||
{
|
||||
uint3 chunkCoord = DispatchThreadId;
|
||||
uint chunkAddress = (chunkCoord.z * (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION) + chunkCoord.y * GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION + chunkCoord.x) * 4;
|
||||
if (chunkAddress == 0)
|
||||
return; // Skip chunk at 0,0,0 (used for counter)
|
||||
float3 chunkMin = GlobalSurfaceAtlas.ViewPos + (chunkCoord - (GLOBAL_SURFACE_ATLAS_CHUNKS_RESOLUTION * 0.5f)) * GlobalSurfaceAtlas.ChunkSize;
|
||||
float3 chunkMax = chunkMin + GlobalSurfaceAtlas.ChunkSize;
|
||||
|
||||
// Count objects data size in this chunk (amount of float4s)
|
||||
uint objectsSize = 0, objectAddress = 0, objectsCount = 0;
|
||||
// TODO: maybe cache 20-30 culled object indices in thread memory to skip culling them again when copying data (maybe reude chunk size to get smaller objects count per chunk)?
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
{
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
objectsSize += objectSize;
|
||||
objectsCount++;
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
}
|
||||
if (objectsSize == 0)
|
||||
{
|
||||
// Empty chunk
|
||||
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0);
|
||||
return;
|
||||
}
|
||||
objectsSize++; // Include objects count before actual objects data
|
||||
|
||||
// Allocate object data size in the buffer
|
||||
uint objectsStart;
|
||||
RWGlobalSurfaceAtlasChunks.InterlockedAdd(0, objectsSize, objectsStart);
|
||||
if (objectsStart + objectsSize > CulledObjectsCapacity)
|
||||
{
|
||||
// Not enough space in the buffer
|
||||
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Write object data start
|
||||
RWGlobalSurfaceAtlasChunks.Store(chunkAddress, objectsStart);
|
||||
|
||||
// Write objects count before actual objects data
|
||||
RWGlobalSurfaceAtlasCulledObjects[objectsStart] = float4(asfloat(objectsCount), 0, 0, 0);
|
||||
objectsStart++;
|
||||
|
||||
// Copy objects data in this chunk
|
||||
objectAddress = 0;
|
||||
LOOP
|
||||
for (uint objectIndex = 0; objectIndex < GlobalSurfaceAtlas.ObjectsCount; objectIndex++)
|
||||
{
|
||||
float4 objectBounds = LoadGlobalSurfaceAtlasObjectBounds(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
uint objectSize = LoadGlobalSurfaceAtlasObjectDataSize(GlobalSurfaceAtlasObjects, objectAddress);
|
||||
if (BoxIntersectsSphere(chunkMin, chunkMax, objectBounds.xyz, objectBounds.w))
|
||||
{
|
||||
for (uint i = 0; i < objectSize; i++)
|
||||
{
|
||||
RWGlobalSurfaceAtlasCulledObjects[objectsStart + i] = GlobalSurfaceAtlasObjects[objectAddress + i];
|
||||
}
|
||||
objectsStart += objectSize;
|
||||
}
|
||||
objectAddress += objectSize;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _PS_Debug
|
||||
|
||||
Texture3D<float> GlobalSDFTex[4] : register(t0);
|
||||
Texture3D<float> GlobalSDFMip[4] : register(t4);
|
||||
Buffer<float4> GlobalSurfaceAtlasObjects : register(t8);
|
||||
Buffer<float4> GlobalSurfaceAtlasTiles : register(t9);
|
||||
ByteAddressBuffer GlobalSurfaceAtlasChunks : register(t8);
|
||||
Buffer<float4> GlobalSurfaceAtlasCulledObjects : register(t9);
|
||||
Texture2D GlobalSurfaceAtlasDepth : register(t10);
|
||||
Texture2D GlobalSurfaceAtlasTex : register(t11);
|
||||
|
||||
@@ -187,7 +267,7 @@ float4 PS_Debug(Quad_VS2PS input) : SV_Target
|
||||
//return float4(hit.HitNormal * 0.5f + 0.5f, 1);
|
||||
|
||||
// Sample Global Surface Atlas at the hit location
|
||||
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasObjects, GlobalSurfaceAtlasTiles, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay);
|
||||
float4 surfaceColor = SampleGlobalSurfaceAtlas(GlobalSurfaceAtlas, GlobalSurfaceAtlasChunks, GlobalSurfaceAtlasCulledObjects, GlobalSurfaceAtlasDepth, GlobalSurfaceAtlasTex, hit.GetHitPosition(trace), -viewRay);
|
||||
return float4(surfaceColor.rgb, 1);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user