This commit is contained in:
Wojtek Figat
2024-06-28 09:52:06 +02:00
12 changed files with 832 additions and 619 deletions

View File

@@ -841,7 +841,6 @@ namespace FlaxEditor.Modules
{
// Open project, then close it
Editor.OpenProject(Editor.GameProject.ProjectPath);
Editor.Windows.MainWindow.Close(ClosingReason.User);
}
private void OnMenuFileShowHide(Control control)

View File

@@ -5,6 +5,7 @@
#include "Engine/Platform/Platform.h"
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Memory/Allocation.h"
#include "Engine/Core/Math/Math.h"
/// <summary>
/// Template for ring buffer with variable capacity.
@@ -98,4 +99,10 @@ public:
Memory::DestructItems(Get() + Math::Min(_front, _back), _count);
_front = _back = _count = 0;
}
void Release()
{
Clear();
_allocation.Free();
}
};

View File

@@ -0,0 +1,86 @@
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
#pragma once
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Types/BaseTypes.h"
// Base class for custom heap-based allocators (eg. with local pooling/paging). Expects only Allocate/Free methods to be provided.
template<typename This, uint32 InitialCapacity = 8>
class SimpleHeapAllocation
{
public:
enum { HasSwap = true };
template<typename T>
class Data
{
T* _data = nullptr;
uintptr _size;
public:
FORCE_INLINE Data()
{
}
FORCE_INLINE ~Data()
{
if (_data)
This::Free(_data, _size);
}
FORCE_INLINE T* Get()
{
return _data;
}
FORCE_INLINE const T* Get() const
{
return _data;
}
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
{
capacity = capacity ? capacity * 2 : InitialCapacity;
if (capacity < minCapacity)
capacity = minCapacity;
return capacity;
}
FORCE_INLINE void Allocate(uint64 capacity)
{
_size = capacity * sizeof(T);
_data = (T*)This::Allocate(_size);
}
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
{
T* newData = capacity != 0 ? (T*)This::Allocate(capacity * sizeof(T)) : nullptr;
if (oldCount)
{
if (newCount > 0)
Memory::MoveItems(newData, _data, newCount);
Memory::DestructItems(_data, oldCount);
}
if (_data)
This::Free(_data, _size);
_data = newData;
_size = capacity * sizeof(T);
}
FORCE_INLINE void Free()
{
if (_data)
{
This::Free(_data, _size);
_data = nullptr;
}
}
FORCE_INLINE void Swap(Data& other)
{
::Swap(_data, other._data);
::Swap(_size, other._size);
}
};
};

View File

@@ -4476,6 +4476,8 @@ void PhysicsBackend::DestroyController(void* controller)
void PhysicsBackend::DestroyMaterial(void* material)
{
if (!PhysX)
return; // Skip when called by Content unload after Physics is disposed
ASSERT_LOW_LAYER(material);
auto materialPhysX = (PxMaterial*)material;
materialPhysX->userData = nullptr;
@@ -4486,6 +4488,8 @@ void PhysicsBackend::DestroyMaterial(void* material)
void PhysicsBackend::DestroyObject(void* object)
{
if (!PhysX)
return; // Skip when called by Content unload after Physics is disposed
ASSERT_LOW_LAYER(object);
auto objectPhysX = (PxBase*)object;
FlushLocker.Lock();

View File

@@ -14,7 +14,6 @@
#include "Engine/Content/Content.h"
#include "Engine/Graphics/GPUContext.h"
#include "Engine/Graphics/GPUDevice.h"
#include "Engine/Graphics/Graphics.h"
#include "Engine/Graphics/RenderTask.h"
#include "Engine/Graphics/RenderBuffers.h"
#include "Engine/Graphics/RenderTargetPool.h"
@@ -40,6 +39,8 @@
#define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations)
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty)
#define GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME 500 // Limits the amount of newly added objects to atlas per-frame to reduce hitches on 1st frame or camera-cut
#define GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(flags) (EnumHasAnyFlags(flags, StaticFlags::Lightmap) ? 200 : 10) // Amount of frames after which update object (less frequent updates for static scenes)
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
#include "Engine/Debug/DebugDraw.h"
@@ -131,6 +132,7 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi
{
public:
int32 Resolution = 0;
float ResolutionInv;
int32 AtlasPixelsTotal = 0;
int32 AtlasPixelsUsed = 0;
uint64 LastFrameAtlasInsertFail = 0;
@@ -155,7 +157,6 @@ public:
Array<void*> DirtyObjectsBuffer;
Vector4 CullingPosDistance;
uint64 CurrentFrame;
float ResolutionInv;
Float3 ViewPosition;
float TileTexelsPerWorldUnit;
float DistanceScalingStart;
@@ -164,7 +165,7 @@ public:
float MinObjectRadius;
// Async objects drawing cache
Array<int64, FixedAllocation<2>> AsyncDrawWaitLabels;
Array<int64, FixedAllocation<3>> AsyncDrawWaitLabels;
RenderListBuffer<GlobalSurfaceAtlasTile*> AsyncFreeTiles;
RenderListBuffer<GlobalSurfaceAtlasNewObject> AsyncNewObjects;
RenderListBuffer<GlobalSurfaceAtlasNewTile> AsyncNewTiles;
@@ -178,7 +179,7 @@ public:
void ClearObjects()
{
WaitForDrawActors();
WaitForDrawing();
CulledObjectsCounterIndex = -1;
CulledObjectsUsageHistory.Clear();
LastFrameAtlasDefragmentation = Engine::FrameCount;
@@ -250,16 +251,17 @@ public:
}
}
void StartDrawActors(const RenderContext& renderContext, bool enableAsync = false)
void StartDrawing(const RenderContext& renderContext, bool enableAsync = false)
{
if (AsyncDrawWaitLabels.HasItems())
return; // Already started earlier this frame
int32 resolution;
float distance;
GetOptions(renderContext, resolution, distance);
const float resolutionInv = 1.0f / (float)resolution;
if (Resolution != resolution)
return; // Not yet initialized
PROFILE_CPU();
const auto currentFrame = Engine::FrameCount;
if (Resolution == resolution)
{
// Perform atlas defragmentation if needed
constexpr float maxUsageToDefrag = 0.8f;
@@ -281,7 +283,6 @@ public:
// Setup data for rendering
CurrentFrame = currentFrame;
ResolutionInv = resolutionInv;
ViewPosition = renderContext.View.Position;
TileTexelsPerWorldUnit = 1.0f / METERS_TO_UNITS(0.1f); // Scales the tiles resolution
DistanceScalingStart = METERS_TO_UNITS(20.0f); // Distance from camera at which the tiles resolution starts to be scaled down
@@ -301,42 +302,43 @@ public:
if (enableAsync)
{
// Run in async via Job System
// Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing)
Function<void(int32)> func;
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJob>(this);
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJobSync>(this);
const int32 jobCount = Math::Max(JobSystem::GetThreadsCount() - 1, 1); // Leave 1 thread unused to not block the main-thread (jobs will overlap with rendering)
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount));
// Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing)
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJobSync>(this);
// Run in async via Job System
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJob>(this);
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount));
// Run dependant job that will process objects data in async
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::SetupJob>(this);
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, ToSpan(AsyncDrawWaitLabels)));
}
else
{
DrawActorsJob(-1);
DrawActorsJob(0);
SetupJob(0);
}
}
void WaitForDrawActors()
void WaitForDrawing()
{
for (int64 label : AsyncDrawWaitLabels)
JobSystem::Wait(label);
AsyncDrawWaitLabels.Clear();
}
void PostDrawActors()
void FlushNewObjects()
{
PROFILE_CPU_NAMED("Post Draw");
PROFILE_CPU_NAMED("Flush Atlas");
// Flush atlas tiles freeing
for (auto* tile : AsyncFreeTiles)
{
Atlas.Free(tile, this);
}
AsyncFreeTiles.Clear();
// Flush new objects adding
for (auto& newObject : AsyncNewObjects)
{
auto& object = Objects[newObject.ActorObject];
@@ -350,7 +352,6 @@ public:
}
AsyncNewObjects.Clear();
// Flush new tiles adding
for (auto& newTile : AsyncNewTiles)
{
auto& object = Objects[newTile.ActorObject];
@@ -371,6 +372,114 @@ public:
AsyncNewTiles.Clear();
}
void CompactObjects()
{
PROFILE_CPU_NAMED("Compact Objects");
for (auto it = Objects.Begin(); it.IsNotEnd(); ++it)
{
if (it->Value.LastFrameUsed != CurrentFrame)
{
for (auto& tile : it->Value.Tiles)
{
if (tile)
Atlas.Free(tile, this);
}
Objects.Remove(it);
}
}
}
void WriteObjects()
{
PROFILE_CPU_NAMED("Write Objects");
DirtyObjectsBuffer.Clear();
ObjectsBuffer.Clear();
for (auto& e : Objects)
{
auto& object = e.Value;
if (object.Dirty)
{
// Collect dirty objects
object.LastFrameUpdated = CurrentFrame;
object.LightingUpdateFrame = CurrentFrame;
DirtyObjectsBuffer.Add(e.Key);
}
Matrix3x3 worldToLocalRotation;
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
Float3 worldPosition = object.Bounds.Transformation.Translation;
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
// Write to objects buffer (this must match unpacking logic in HLSL)
uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4);
auto* objectData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
objectData[0] = Float4(object.Position, object.Radius);
objectData[1] = Float4::Zero;
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
auto* tile = object.Tiles[tileIndex];
if (!tile)
continue;
tile->ObjectAddressOffset = *objectDataSize;
tile->Address = objectAddress + tile->ObjectAddressOffset;
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
// Setup view to render object from the side
Float3 xAxis, yAxis, zAxis = Float3::Zero;
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
Float3::Cross(yAxis, zAxis, xAxis);
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
xAxis.NormalizeFast();
yAxis.NormalizeFast();
zAxis.NormalizeFast();
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
tile->ViewDirection = zAxis;
// Create view matrix
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
// Calculate object bounds size in the view
OrientedBoundingBox viewBounds(object.Bounds);
viewBounds.Transform(tile->ViewMatrix);
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
// Per-tile data
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
auto* tileData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * ResolutionInv;
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
}
}
}
void SetupJob(int32)
{
PROFILE_CPU();
FlushNewObjects();
CompactObjects();
WriteObjects();
}
// [ISceneRenderingListener]
void OnSceneRenderingAddActor(Actor* a) override
{
@@ -550,7 +659,7 @@ void GlobalSurfaceAtlasPass::Dispose()
void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContextBatch)
{
// Check if Global Surface Atlas will be used this frame
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
PROFILE_CPU_NAMED("Global Surface Atlas");
if (checkIfSkipPass())
return;
RenderContext& renderContext = renderContextBatch.GetMainContext();
@@ -563,7 +672,7 @@ void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContex
return;
auto& surfaceAtlasData = *renderContext.Buffers->GetCustomBuffer<GlobalSurfaceAtlasCustomBuffer>(TEXT("GlobalSurfaceAtlas"));
_surfaceAtlasData = &surfaceAtlasData;
surfaceAtlasData.StartDrawActors(renderContext, renderContextBatch.EnableAsync);
surfaceAtlasData.StartDrawing(renderContext, renderContextBatch.EnableAsync);
}
bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* context, BindingData& result)
@@ -590,10 +699,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
surfaceAtlasData.LastFrameUsed = currentFrame;
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
// Start objects drawing (in case not et started earlier this frame)
_surfaceAtlasData = &surfaceAtlasData;
surfaceAtlasData.StartDrawActors(renderContext);
// Setup options
int32 resolution;
float distance;
@@ -620,6 +725,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm);
#undef INIT_ATLAS_TEXTURE
surfaceAtlasData.Resolution = resolution;
surfaceAtlasData.ResolutionInv = resolutionInv;
surfaceAtlasData.AtlasPixelsTotal = resolution * resolution;
if (!surfaceAtlasData.ChunksBuffer)
{
@@ -635,6 +741,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
if (!_vertexBuffer)
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
// Ensure that async objects drawing ended
_surfaceAtlasData = &surfaceAtlasData;
surfaceAtlasData.StartDrawing(renderContext); // (ignored if not started earlier this frame)
surfaceAtlasData.WaitForDrawing();
// Utility for writing into tiles vertex buffer
const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
const Float2 posToClipAdd(-1.0f, 1.0f);
@@ -665,110 +776,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
context->BindVB(ToSpan(&vb, 1)); \
context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1);
// Ensure that async objects drawing ended
surfaceAtlasData.WaitForDrawActors();
surfaceAtlasData.PostDrawActors();
// Remove unused objects
{
PROFILE_GPU_CPU_NAMED("Compact Objects");
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
{
if (it->Value.LastFrameUsed != currentFrame)
{
for (auto& tile : it->Value.Tiles)
{
if (tile)
surfaceAtlasData.Atlas.Free(tile, &surfaceAtlasData);
}
surfaceAtlasData.Objects.Remove(it);
}
}
}
// Write objects to the data buffer
{
PROFILE_CPU_NAMED("Write Objects");
surfaceAtlasData.DirtyObjectsBuffer.Clear();
surfaceAtlasData.ObjectsBuffer.Clear();
for (auto& e : surfaceAtlasData.Objects)
{
auto& object = e.Value;
if (object.Dirty)
{
// Collect dirty objects
object.LastFrameUpdated = surfaceAtlasData.CurrentFrame;
object.LightingUpdateFrame = surfaceAtlasData.CurrentFrame;
surfaceAtlasData.DirtyObjectsBuffer.Add(e.Key);
}
Matrix3x3 worldToLocalRotation;
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
Float3 worldPosition = object.Bounds.Transformation.Translation;
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
// Write to objects buffer (this must match unpacking logic in HLSL)
uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4);
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
objectData[0] = Float4(object.Position, object.Radius);
objectData[1] = Float4::Zero;
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
auto* tile = object.Tiles[tileIndex];
if (!tile)
continue;
tile->ObjectAddressOffset = *objectDataSize;
tile->Address = objectAddress + tile->ObjectAddressOffset;
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
// Setup view to render object from the side
Float3 xAxis, yAxis, zAxis = Float3::Zero;
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
Float3::Cross(yAxis, zAxis, xAxis);
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
xAxis.NormalizeFast();
yAxis.NormalizeFast();
zAxis.NormalizeFast();
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
tile->ViewDirection = zAxis;
// Create view matrix
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
// Calculate object bounds size in the view
OrientedBoundingBox viewBounds(object.Bounds);
viewBounds.Transform(tile->ViewMatrix);
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
// Per-tile data
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv;
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
}
}
}
// Rasterize world geometry material properties into Global Surface Atlas
if (surfaceAtlasData.DirtyObjectsBuffer.Count() != 0)
{
@@ -795,7 +802,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
{
PROFILE_GPU_CPU_NAMED("Clear");
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD)
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES)
{
// Full-atlas hardware clear
context->ClearDepth(depthBuffer);
@@ -1084,7 +1091,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
@@ -1119,7 +1126,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
@@ -1141,7 +1148,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
{
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
lightData.LastFrameUsed = currentFrame;
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
continue;
lightData.LastFrameUpdated = currentFrame;
@@ -1457,9 +1464,9 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
const float distanceScale = Math::Lerp(1.0f, surfaceAtlasData.DistanceScaling, Math::InverseLerp(surfaceAtlasData.DistanceScalingStart, surfaceAtlasData.DistanceScalingEnd, (float)CollisionsHelper::DistanceSpherePoint(actorObjectBounds, surfaceAtlasData.ViewPosition)));
const float tilesScale = surfaceAtlasData.TileTexelsPerWorldUnit * distanceScale * qualityScale;
GlobalSurfaceAtlasObject* object = surfaceAtlasData.Objects.TryGet(actorObject);
if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= 512)
if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME)
return; // Reduce load on 1st frame and add more objects during next frames to balance performance
bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD;
bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES;
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
{
if (((1 << tileIndex) & tilesMask) == 0)
@@ -1515,7 +1522,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
if (object)
{
// Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed)
uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4;
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(actor->GetStaticFlags());
if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount)))
dirty = true;
@@ -1525,7 +1532,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
object->Bounds = bounds;
object->Position = (Float3)actorObjectBounds.Center; // TODO: large worlds
object->Radius = (float)actorObjectBounds.Radius;
object->Dirty = dirty;
object->Dirty |= dirty;
object->UseVisibility = useVisibility;
}
else

File diff suppressed because it is too large Load Diff

View File

@@ -39,20 +39,15 @@ private:
GPUShaderProgramCS* _csGenerateMip = nullptr;
GPUConstantBuffer* _cb0 = nullptr;
GPUConstantBuffer* _cb1 = nullptr;
// Rasterization cache
class DynamicStructuredBuffer* _objectsBuffer = nullptr;
Array<GPUTextureView*> _objectsTextures;
uint16 _objectsBufferCount;
int32 _cascadeIndex;
float _voxelSize, _chunkSize;
BoundingBox _cascadeBounds;
BoundingBox _cascadeCullingBounds;
class GlobalSignDistanceFieldCustomBuffer* _sdfData;
Vector3 _sdfDataOriginMin;
Vector3 _sdfDataOriginMax;
public:
/// <summary>
/// Calls drawing scene objects in async early in the frame.
/// </summary>
/// <param name="renderContextBatch">The rendering context batch.</param>
void OnCollectDrawCalls(RenderContextBatch& renderContextBatch);
/// <summary>
/// Gets the Global SDF (only if enabled in Graphics Settings).
/// </summary>
@@ -78,10 +73,7 @@ public:
/// <param name="output">The output buffer.</param>
void RenderDebug(RenderContext& renderContext, GPUContext* context, GPUTexture* output);
void GetCullingData(BoundingBox& bounds) const
{
bounds = _cascadeCullingBounds;
}
void GetCullingData(BoundingBox& bounds) const;
// Rasterize Model SDF into the Global SDF. Call it from actor Draw() method during DrawPass::GlobalSDF.
void RasterizeModelSDF(Actor* actor, const ModelBase::SDFData& sdf, const Transform& localToWorld, const BoundingBox& objectBounds);

View File

@@ -30,13 +30,7 @@ namespace
Array<DrawBatch> SortingBatches;
Array<RenderList*> FreeRenderList;
struct MemPoolEntry
{
void* Ptr;
uintptr Size;
};
Array<MemPoolEntry> MemPool;
Array<Pair<void*, uintptr>> MemPool;
CriticalSection MemPoolLocker;
}
@@ -147,18 +141,16 @@ void* RendererAllocation::Allocate(uintptr size)
MemPoolLocker.Lock();
for (int32 i = 0; i < MemPool.Count(); i++)
{
if (MemPool[i].Size == size)
if (MemPool.Get()[i].Second == size)
{
result = MemPool[i].Ptr;
result = MemPool.Get()[i].First;
MemPool.RemoveAt(i);
break;
}
}
MemPoolLocker.Unlock();
if (!result)
{
result = Platform::Allocate(size, 16);
}
return result;
}
@@ -201,7 +193,7 @@ void RenderList::CleanupCache()
SortingIndices.Resize(0);
FreeRenderList.ClearDelete();
for (auto& e : MemPool)
Platform::Free(e.Ptr);
Platform::Free(e.First);
MemPool.Clear();
}

View File

@@ -409,6 +409,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
JobSystem::SetJobStartingOnDispatch(false);
task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDraw);
task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDrawAsync);
if (setup.UseGlobalSDF)
GlobalSignDistanceFieldPass::Instance()->OnCollectDrawCalls(renderContextBatch);
if (setup.UseGlobalSurfaceAtlas)
GlobalSurfaceAtlasPass::Instance()->OnCollectDrawCalls(renderContextBatch);

View File

@@ -2,86 +2,11 @@
#pragma once
#include "Engine/Core/Memory/Memory.h"
#include "Engine/Core/Types/BaseTypes.h"
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
class RendererAllocation
class RendererAllocation : public SimpleHeapAllocation<RendererAllocation, 64>
{
public:
static FLAXENGINE_API void* Allocate(uintptr size);
static FLAXENGINE_API void Free(void* ptr, uintptr size);
enum { HasSwap = true };
template<typename T>
class Data
{
T* _data = nullptr;
uintptr _size;
public:
FORCE_INLINE Data()
{
}
FORCE_INLINE ~Data()
{
if (_data)
RendererAllocation::Free(_data, _size);
}
FORCE_INLINE T* Get()
{
return _data;
}
FORCE_INLINE const T* Get() const
{
return _data;
}
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
{
capacity = capacity ? capacity * 2 : 64;
if (capacity < minCapacity)
capacity = minCapacity;
return capacity;
}
FORCE_INLINE void Allocate(uint64 capacity)
{
_size = capacity * sizeof(T);
_data = (T*)RendererAllocation::Allocate(_size);
}
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
{
T* newData = capacity != 0 ? (T*)RendererAllocation::Allocate(capacity * sizeof(T)) : nullptr;
if (oldCount)
{
if (newCount > 0)
Memory::MoveItems(newData, _data, newCount);
Memory::DestructItems(_data, oldCount);
}
if (_data)
RendererAllocation::Free(_data, _size);
_data = newData;
_size = capacity * sizeof(T);
}
FORCE_INLINE void Free()
{
if (_data)
{
RendererAllocation::Free(_data, _size);
_data = nullptr;
}
}
FORCE_INLINE void Swap(Data& other)
{
::Swap(_data, other._data);
::Swap(_size, other._size);
}
};
};

View File

@@ -5,34 +5,29 @@
#include "Engine/Platform/CPUInfo.h"
#include "Engine/Platform/Thread.h"
#include "Engine/Platform/ConditionVariable.h"
#include "Engine/Core/Types/Span.h"
#include "Engine/Core/Types/Pair.h"
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
#include "Engine/Core/Collections/Dictionary.h"
#include "Engine/Core/Collections/RingBuffer.h"
#include "Engine/Engine/EngineService.h"
#include "Engine/Profiler/ProfilerCPU.h"
#if USE_CSHARP
#include "Engine/Scripting/ManagedCLR/MCore.h"
#endif
// Jobs storage perf info:
// (500 jobs, i7 9th gen)
// JOB_SYSTEM_USE_MUTEX=1, enqueue=130-280 cycles, dequeue=2-6 cycles
// JOB_SYSTEM_USE_MUTEX=0, enqueue=300-700 cycles, dequeue=10-16 cycles
// So using RingBuffer+Mutex+Signals is better than moodycamel::ConcurrentQueue
#define JOB_SYSTEM_ENABLED 1
#define JOB_SYSTEM_USE_MUTEX 1
#define JOB_SYSTEM_USE_STATS 0
#if JOB_SYSTEM_USE_STATS
#include "Engine/Core/Log.h"
#endif
#if JOB_SYSTEM_USE_MUTEX
#include "Engine/Core/Collections/RingBuffer.h"
#else
#include "ConcurrentQueue.h"
#endif
#if JOB_SYSTEM_ENABLED
// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread).
class JobSystemAllocation : public SimpleHeapAllocation<JobSystemAllocation>
{
public:
static void* Allocate(uintptr size);
static void Free(void* ptr, uintptr size);
};
class JobSystemService : public EngineService
{
public:
@@ -48,13 +43,26 @@ public:
struct JobData
{
Function<void(int32)> Job;
int32 Index;
int64 JobKey;
};
template<>
struct TIsPODType<JobData>
{
enum { Value = true };
};
struct JobContext
{
volatile int64 JobsLeft;
int32 DependenciesLeft;
Function<void(int32)> Job;
Array<int64, JobSystemAllocation> Dependants;
};
template<>
struct TIsPODType<JobContext>
{
enum { Value = false };
};
@@ -79,40 +87,44 @@ public:
}
};
struct JobContext
{
volatile int64 JobsLeft;
};
template<>
struct TIsPODType<JobContext>
{
enum { Value = true };
};
namespace
{
JobSystemService JobSystemInstance;
Array<Pair<void*, uintptr>> MemPool;
Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {};
int32 ThreadsCount = 0;
bool JobStartingOnDispatch = true;
volatile int64 ExitFlag = 0;
volatile int64 JobLabel = 0;
Dictionary<int64, JobContext> JobContexts;
Dictionary<int64, JobContext, JobSystemAllocation> JobContexts;
ConditionVariable JobsSignal;
CriticalSection JobsMutex;
ConditionVariable WaitSignal;
CriticalSection WaitMutex;
CriticalSection JobsLocker;
#if JOB_SYSTEM_USE_MUTEX
RingBuffer<JobData> Jobs;
#else
ConcurrentQueue<JobData> Jobs;
#endif
#if JOB_SYSTEM_USE_STATS
int64 DequeueCount = 0;
int64 DequeueSum = 0;
#endif
}
void* JobSystemAllocation::Allocate(uintptr size)
{
void* result = nullptr;
for (int32 i = 0; i < MemPool.Count(); i++)
{
if (MemPool.Get()[i].Second == size)
{
result = MemPool.Get()[i].First;
MemPool.RemoveAt(i);
break;
}
}
if (!result)
result = Platform::Allocate(size, 16);
return result;
}
void JobSystemAllocation::Free(void* ptr, uintptr size)
{
MemPool.Add({ ptr, size });
}
bool JobSystemService::Init()
@@ -151,6 +163,12 @@ void JobSystemService::Dispose()
Threads[i] = nullptr;
}
}
JobContexts.SetCapacity(0);
Jobs.Release();
for (auto& e : MemPool)
Platform::Free(e.First);
MemPool.Clear();
}
int32 JobSystemThread::Run()
@@ -158,34 +176,22 @@ int32 JobSystemThread::Run()
Platform::SetThreadAffinityMask(1ull << Index);
JobData data;
Function<void(int32)> job;
bool attachCSharpThread = true;
#if !JOB_SYSTEM_USE_MUTEX
moodycamel::ConsumerToken consumerToken(Jobs);
#endif
while (Platform::AtomicRead(&ExitFlag) == 0)
{
// Try to get a job
#if JOB_SYSTEM_USE_STATS
const auto start = Platform::GetTimeCycles();
#endif
#if JOB_SYSTEM_USE_MUTEX
JobsLocker.Lock();
if (Jobs.Count() != 0)
{
data = Jobs.PeekFront();
Jobs.PopFront();
const JobContext& context = ((const Dictionary<int64, JobContext>&)JobContexts).At(data.JobKey);
job = context.Job;
}
JobsLocker.Unlock();
#else
if (!Jobs.try_dequeue(consumerToken, data))
data.Job.Unbind();
#endif
#if JOB_SYSTEM_USE_STATS
Platform::InterlockedIncrement(&DequeueCount);
Platform::InterlockedAdd(&DequeueSum, Platform::GetTimeCycles() - start);
#endif
if (data.Job.IsBinded())
if (job.IsBinded())
{
#if USE_CSHARP
// Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System)
@@ -197,21 +203,37 @@ int32 JobSystemThread::Run()
#endif
// Run job
data.Job(data.Index);
job(data.Index);
// Move forward with the job queue
bool notifyWaiting = false;
JobsLocker.Lock();
JobContext& context = JobContexts.At(data.JobKey);
if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0)
{
ASSERT_LOW_LAYER(context.JobsLeft <= 0);
// Update any dependant jobs
for (int64 dependant : context.Dependants)
{
JobContext& dependantContext = JobContexts.At(dependant);
if (--dependantContext.DependenciesLeft <= 0)
{
// Dispatch dependency when it's ready
JobData dependantData;
dependantData.JobKey = dependant;
for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++)
Jobs.PushBack(dependantData);
}
}
// Remove completed context
JobContexts.Remove(data.JobKey);
notifyWaiting = true;
}
JobsLocker.Unlock();
if (notifyWaiting)
WaitSignal.NotifyAll();
WaitSignal.NotifyAll();
data.Job.Unbind();
job.Unbind();
}
else
{
@@ -247,39 +269,25 @@ void JobSystem::Execute(const Function<void(int32)>& job, int32 jobCount)
int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
{
PROFILE_CPU();
if (jobCount <= 0)
return 0;
PROFILE_CPU();
#if JOB_SYSTEM_ENABLED
#if JOB_SYSTEM_USE_STATS
const auto start = Platform::GetTimeCycles();
#endif
const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
JobData data;
data.Job = job;
data.JobKey = label;
JobContext context;
context.Job = job;
context.JobsLeft = jobCount;
context.DependenciesLeft = 0;
#if JOB_SYSTEM_USE_MUTEX
JobsLocker.Lock();
JobContexts.Add(label, context);
JobContexts.Add(label, MoveTemp(context));
for (data.Index = 0; data.Index < jobCount; data.Index++)
Jobs.PushBack(data);
JobsLocker.Unlock();
#else
JobsLocker.Lock();
JobContexts.Add(label, context);
JobsLocker.Unlock();
for (data.Index = 0; data.Index < jobCount; data.Index++)
Jobs.enqueue(data);
#endif
#if JOB_SYSTEM_USE_STATS
LOG(Info, "Job enqueue time: {0} cycles", (int64)(Platform::GetTimeCycles() - start));
#endif
if (JobStartingOnDispatch)
{
@@ -297,6 +305,56 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
#endif
}
int64 JobSystem::Dispatch(const Function<void(int32)>& job, Span<int64> dependencies, int32 jobCount)
{
if (jobCount <= 0)
return 0;
PROFILE_CPU();
#if JOB_SYSTEM_ENABLED
const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
JobData data;
data.JobKey = label;
JobContext context;
context.Job = job;
context.JobsLeft = jobCount;
context.DependenciesLeft = 0;
JobsLocker.Lock();
for (int64 dependency : dependencies)
{
if (JobContext* dependencyContext = JobContexts.TryGet(dependency))
{
context.DependenciesLeft++;
dependencyContext->Dependants.Add(label);
}
}
JobContexts.Add(label, MoveTemp(context));
if (context.DependenciesLeft == 0)
{
// No dependencies left to complete so dispatch now
for (data.Index = 0; data.Index < jobCount; data.Index++)
Jobs.PushBack(data);
}
JobsLocker.Unlock();
if (context.DependenciesLeft == 0 && JobStartingOnDispatch)
{
if (jobCount == 1)
JobsSignal.NotifyOne();
else
JobsSignal.NotifyAll();
}
return label;
#else
for (int32 i = 0; i < jobCount; i++)
job(i);
return 0;
#endif
}
void JobSystem::Wait()
{
#if JOB_SYSTEM_ENABLED
@@ -340,11 +398,6 @@ void JobSystem::Wait(int64 label)
// Wake up any thread to prevent stalling in highly multi-threaded environment
JobsSignal.NotifyOne();
}
#if JOB_SYSTEM_USE_STATS
LOG(Info, "Job average dequeue time: {0} cycles", DequeueSum / DequeueCount);
DequeueSum = DequeueCount = 0;
#endif
#endif
}
@@ -352,16 +405,11 @@ void JobSystem::SetJobStartingOnDispatch(bool value)
{
#if JOB_SYSTEM_ENABLED
JobStartingOnDispatch = value;
if (value)
{
#if JOB_SYSTEM_USE_MUTEX
JobsLocker.Lock();
const int32 count = Jobs.Count();
JobsLocker.Unlock();
#else
const int32 count = Jobs.Count();
#endif
if (count == 1)
JobsSignal.NotifyOne();
else if (count != 0)

View File

@@ -4,6 +4,9 @@
#include "Engine/Core/Delegate.h"
template<typename T>
class Span;
/// <summary>
/// Lightweight multi-threaded jobs execution scheduler. Uses a pool of threads and supports work-stealing concept.
/// </summary>
@@ -26,6 +29,15 @@ API_CLASS(Static) class FLAXENGINE_API JobSystem
/// <returns>The label identifying this dispatch. Can be used to wait for the execution end.</returns>
API_FUNCTION() static int64 Dispatch(const Function<void(int32)>& job, int32 jobCount = 1);
/// <summary>
/// Dispatches the job for the execution after all of dependant jobs will complete.
/// </summary>
/// <param name="job">The job. Argument is an index of the job execution.</param>
/// <param name="dependencies">The list of dependant jobs that need to complete in order to start executing this job.</param>
/// <param name="jobCount">The job executions count.</param>
/// <returns>The label identifying this dispatch. Can be used to wait for the execution end.</returns>
API_FUNCTION() static int64 Dispatch(const Function<void(int32)>& job, Span<int64> dependencies, int32 jobCount = 1);
/// <summary>
/// Waits for all dispatched jobs to finish.
/// </summary>