Optimize Global Surface Atlas setup and objects buffer writing to be async
This commit is contained in:
@@ -131,6 +131,7 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi
|
||||
{
|
||||
public:
|
||||
int32 Resolution = 0;
|
||||
float ResolutionInv;
|
||||
int32 AtlasPixelsTotal = 0;
|
||||
int32 AtlasPixelsUsed = 0;
|
||||
uint64 LastFrameAtlasInsertFail = 0;
|
||||
@@ -155,7 +156,6 @@ public:
|
||||
Array<void*> DirtyObjectsBuffer;
|
||||
Vector4 CullingPosDistance;
|
||||
uint64 CurrentFrame;
|
||||
float ResolutionInv;
|
||||
Float3 ViewPosition;
|
||||
float TileTexelsPerWorldUnit;
|
||||
float DistanceScalingStart;
|
||||
@@ -164,7 +164,7 @@ public:
|
||||
float MinObjectRadius;
|
||||
|
||||
// Async objects drawing cache
|
||||
Array<int64, FixedAllocation<2>> AsyncDrawWaitLabels;
|
||||
Array<int64, FixedAllocation<3>> AsyncDrawWaitLabels;
|
||||
RenderListBuffer<GlobalSurfaceAtlasTile*> AsyncFreeTiles;
|
||||
RenderListBuffer<GlobalSurfaceAtlasNewObject> AsyncNewObjects;
|
||||
RenderListBuffer<GlobalSurfaceAtlasNewTile> AsyncNewTiles;
|
||||
@@ -257,9 +257,9 @@ public:
|
||||
int32 resolution;
|
||||
float distance;
|
||||
GetOptions(renderContext, resolution, distance);
|
||||
const float resolutionInv = 1.0f / (float)resolution;
|
||||
if (Resolution != resolution)
|
||||
return; // Not yet initialized
|
||||
const auto currentFrame = Engine::FrameCount;
|
||||
if (Resolution == resolution)
|
||||
{
|
||||
// Perform atlas defragmentation if needed
|
||||
constexpr float maxUsageToDefrag = 0.8f;
|
||||
@@ -281,7 +281,6 @@ public:
|
||||
|
||||
// Setup data for rendering
|
||||
CurrentFrame = currentFrame;
|
||||
ResolutionInv = resolutionInv;
|
||||
ViewPosition = renderContext.View.Position;
|
||||
TileTexelsPerWorldUnit = 1.0f / METERS_TO_UNITS(0.1f); // Scales the tiles resolution
|
||||
DistanceScalingStart = METERS_TO_UNITS(20.0f); // Distance from camera at which the tiles resolution starts to be scaled down
|
||||
@@ -310,11 +309,16 @@ public:
|
||||
// Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing)
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJobSync>(this);
|
||||
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount));
|
||||
|
||||
// Run dependant job that will process objects data in async
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::SetupJob>(this);
|
||||
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, ToSpan(AsyncDrawWaitLabels)));
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawActorsJob(-1);
|
||||
DrawActorsJob(0);
|
||||
SetupJob(0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -325,18 +329,14 @@ public:
|
||||
AsyncDrawWaitLabels.Clear();
|
||||
}
|
||||
|
||||
void PostDrawActors()
|
||||
void FlushNewObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Post Draw");
|
||||
PROFILE_CPU_NAMED("Flush Atlas");
|
||||
|
||||
// Flush atlas tiles freeing
|
||||
for (auto* tile : AsyncFreeTiles)
|
||||
{
|
||||
Atlas.Free(tile, this);
|
||||
}
|
||||
AsyncFreeTiles.Clear();
|
||||
|
||||
// Flush new objects adding
|
||||
for (auto& newObject : AsyncNewObjects)
|
||||
{
|
||||
auto& object = Objects[newObject.ActorObject];
|
||||
@@ -350,7 +350,6 @@ public:
|
||||
}
|
||||
AsyncNewObjects.Clear();
|
||||
|
||||
// Flush new tiles adding
|
||||
for (auto& newTile : AsyncNewTiles)
|
||||
{
|
||||
auto& object = Objects[newTile.ActorObject];
|
||||
@@ -371,6 +370,114 @@ public:
|
||||
AsyncNewTiles.Clear();
|
||||
}
|
||||
|
||||
void CompactObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Compact Objects");
|
||||
for (auto it = Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
if (it->Value.LastFrameUsed != CurrentFrame)
|
||||
{
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
{
|
||||
if (tile)
|
||||
Atlas.Free(tile, this);
|
||||
}
|
||||
Objects.Remove(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Write Objects");
|
||||
DirtyObjectsBuffer.Clear();
|
||||
ObjectsBuffer.Clear();
|
||||
for (auto& e : Objects)
|
||||
{
|
||||
auto& object = e.Value;
|
||||
if (object.Dirty)
|
||||
{
|
||||
// Collect dirty objects
|
||||
object.LastFrameUpdated = CurrentFrame;
|
||||
object.LightingUpdateFrame = CurrentFrame;
|
||||
DirtyObjectsBuffer.Add(e.Key);
|
||||
}
|
||||
|
||||
Matrix3x3 worldToLocalRotation;
|
||||
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
|
||||
Float3 worldPosition = object.Bounds.Transformation.Translation;
|
||||
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
|
||||
|
||||
// Write to objects buffer (this must match unpacking logic in HLSL)
|
||||
uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4);
|
||||
auto* objectData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = Float4(object.Position, object.Radius);
|
||||
objectData[1] = Float4::Zero;
|
||||
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
|
||||
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
|
||||
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
|
||||
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
|
||||
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
|
||||
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
|
||||
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
auto* tile = object.Tiles[tileIndex];
|
||||
if (!tile)
|
||||
continue;
|
||||
tile->ObjectAddressOffset = *objectDataSize;
|
||||
tile->Address = objectAddress + tile->ObjectAddressOffset;
|
||||
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
|
||||
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
|
||||
|
||||
// Setup view to render object from the side
|
||||
Float3 xAxis, yAxis, zAxis = Float3::Zero;
|
||||
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
|
||||
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
|
||||
Float3::Cross(yAxis, zAxis, xAxis);
|
||||
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
|
||||
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
|
||||
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
|
||||
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
|
||||
xAxis.NormalizeFast();
|
||||
yAxis.NormalizeFast();
|
||||
zAxis.NormalizeFast();
|
||||
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
|
||||
tile->ViewDirection = zAxis;
|
||||
|
||||
// Create view matrix
|
||||
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
|
||||
|
||||
// Calculate object bounds size in the view
|
||||
OrientedBoundingBox viewBounds(object.Bounds);
|
||||
viewBounds.Transform(tile->ViewMatrix);
|
||||
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
|
||||
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
|
||||
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * ResolutionInv;
|
||||
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
|
||||
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetupJob(int32)
|
||||
{
|
||||
PROFILE_CPU();
|
||||
FlushNewObjects();
|
||||
CompactObjects();
|
||||
WriteObjects();
|
||||
}
|
||||
|
||||
// [ISceneRenderingListener]
|
||||
void OnSceneRenderingAddActor(Actor* a) override
|
||||
{
|
||||
@@ -587,10 +694,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
surfaceAtlasData.LastFrameUsed = currentFrame;
|
||||
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
|
||||
|
||||
// Start objects drawing (in case not et started earlier this frame)
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
surfaceAtlasData.StartDrawActors(renderContext);
|
||||
|
||||
// Setup options
|
||||
int32 resolution;
|
||||
float distance;
|
||||
@@ -617,6 +720,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm);
|
||||
#undef INIT_ATLAS_TEXTURE
|
||||
surfaceAtlasData.Resolution = resolution;
|
||||
surfaceAtlasData.ResolutionInv = resolutionInv;
|
||||
surfaceAtlasData.AtlasPixelsTotal = resolution * resolution;
|
||||
if (!surfaceAtlasData.ChunksBuffer)
|
||||
{
|
||||
@@ -632,6 +736,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
if (!_vertexBuffer)
|
||||
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
|
||||
|
||||
// Ensure that async objects drawing ended
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
surfaceAtlasData.StartDrawActors(renderContext); // (ignored if not started earlier this frame)
|
||||
surfaceAtlasData.WaitForDrawActors();
|
||||
|
||||
// Utility for writing into tiles vertex buffer
|
||||
const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
|
||||
const Float2 posToClipAdd(-1.0f, 1.0f);
|
||||
@@ -662,110 +771,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->BindVB(ToSpan(&vb, 1)); \
|
||||
context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1);
|
||||
|
||||
// Ensure that async objects drawing ended
|
||||
surfaceAtlasData.WaitForDrawActors();
|
||||
surfaceAtlasData.PostDrawActors();
|
||||
|
||||
// Remove unused objects
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Compact Objects");
|
||||
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
if (it->Value.LastFrameUsed != currentFrame)
|
||||
{
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
{
|
||||
if (tile)
|
||||
surfaceAtlasData.Atlas.Free(tile, &surfaceAtlasData);
|
||||
}
|
||||
surfaceAtlasData.Objects.Remove(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write objects to the data buffer
|
||||
{
|
||||
PROFILE_CPU_NAMED("Write Objects");
|
||||
surfaceAtlasData.DirtyObjectsBuffer.Clear();
|
||||
surfaceAtlasData.ObjectsBuffer.Clear();
|
||||
for (auto& e : surfaceAtlasData.Objects)
|
||||
{
|
||||
auto& object = e.Value;
|
||||
if (object.Dirty)
|
||||
{
|
||||
// Collect dirty objects
|
||||
object.LastFrameUpdated = surfaceAtlasData.CurrentFrame;
|
||||
object.LightingUpdateFrame = surfaceAtlasData.CurrentFrame;
|
||||
surfaceAtlasData.DirtyObjectsBuffer.Add(e.Key);
|
||||
}
|
||||
|
||||
Matrix3x3 worldToLocalRotation;
|
||||
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
|
||||
Float3 worldPosition = object.Bounds.Transformation.Translation;
|
||||
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
|
||||
|
||||
// Write to objects buffer (this must match unpacking logic in HLSL)
|
||||
uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4);
|
||||
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = Float4(object.Position, object.Radius);
|
||||
objectData[1] = Float4::Zero;
|
||||
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
|
||||
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
|
||||
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
|
||||
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
|
||||
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
|
||||
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
|
||||
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
auto* tile = object.Tiles[tileIndex];
|
||||
if (!tile)
|
||||
continue;
|
||||
tile->ObjectAddressOffset = *objectDataSize;
|
||||
tile->Address = objectAddress + tile->ObjectAddressOffset;
|
||||
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
|
||||
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
|
||||
|
||||
// Setup view to render object from the side
|
||||
Float3 xAxis, yAxis, zAxis = Float3::Zero;
|
||||
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
|
||||
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
|
||||
Float3::Cross(yAxis, zAxis, xAxis);
|
||||
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
|
||||
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
|
||||
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
|
||||
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
|
||||
xAxis.NormalizeFast();
|
||||
yAxis.NormalizeFast();
|
||||
zAxis.NormalizeFast();
|
||||
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
|
||||
tile->ViewDirection = zAxis;
|
||||
|
||||
// Create view matrix
|
||||
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
|
||||
|
||||
// Calculate object bounds size in the view
|
||||
OrientedBoundingBox viewBounds(object.Bounds);
|
||||
viewBounds.Transform(tile->ViewMatrix);
|
||||
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
|
||||
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
|
||||
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv;
|
||||
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
|
||||
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rasterize world geometry material properties into Global Surface Atlas
|
||||
if (surfaceAtlasData.DirtyObjectsBuffer.Count() != 0)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user