Merge branch '1.9' of https://gitlab.flaxengine.com/flax/flaxengine into 1.9
This commit is contained in:
@@ -841,7 +841,6 @@ namespace FlaxEditor.Modules
|
||||
{
|
||||
// Open project, then close it
|
||||
Editor.OpenProject(Editor.GameProject.ProjectPath);
|
||||
Editor.Windows.MainWindow.Close(ClosingReason.User);
|
||||
}
|
||||
|
||||
private void OnMenuFileShowHide(Control control)
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#include "Engine/Platform/Platform.h"
|
||||
#include "Engine/Core/Memory/Memory.h"
|
||||
#include "Engine/Core/Memory/Allocation.h"
|
||||
#include "Engine/Core/Math/Math.h"
|
||||
|
||||
/// <summary>
|
||||
/// Template for ring buffer with variable capacity.
|
||||
@@ -98,4 +99,10 @@ public:
|
||||
Memory::DestructItems(Get() + Math::Min(_front, _back), _count);
|
||||
_front = _back = _count = 0;
|
||||
}
|
||||
|
||||
void Release()
|
||||
{
|
||||
Clear();
|
||||
_allocation.Free();
|
||||
}
|
||||
};
|
||||
|
||||
86
Source/Engine/Core/Memory/SimpleHeapAllocation.h
Normal file
86
Source/Engine/Core/Memory/SimpleHeapAllocation.h
Normal file
@@ -0,0 +1,86 @@
|
||||
// Copyright (c) 2012-2024 Wojciech Figat. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Engine/Core/Memory/Memory.h"
|
||||
#include "Engine/Core/Types/BaseTypes.h"
|
||||
|
||||
// Base class for custom heap-based allocators (eg. with local pooling/paging). Expects only Allocate/Free methods to be provided.
|
||||
template<typename This, uint32 InitialCapacity = 8>
|
||||
class SimpleHeapAllocation
|
||||
{
|
||||
public:
|
||||
enum { HasSwap = true };
|
||||
|
||||
template<typename T>
|
||||
class Data
|
||||
{
|
||||
T* _data = nullptr;
|
||||
uintptr _size;
|
||||
|
||||
public:
|
||||
FORCE_INLINE Data()
|
||||
{
|
||||
}
|
||||
|
||||
FORCE_INLINE ~Data()
|
||||
{
|
||||
if (_data)
|
||||
This::Free(_data, _size);
|
||||
}
|
||||
|
||||
FORCE_INLINE T* Get()
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
||||
FORCE_INLINE const T* Get() const
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
||||
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
|
||||
{
|
||||
capacity = capacity ? capacity * 2 : InitialCapacity;
|
||||
if (capacity < minCapacity)
|
||||
capacity = minCapacity;
|
||||
return capacity;
|
||||
}
|
||||
|
||||
FORCE_INLINE void Allocate(uint64 capacity)
|
||||
{
|
||||
_size = capacity * sizeof(T);
|
||||
_data = (T*)This::Allocate(_size);
|
||||
}
|
||||
|
||||
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
|
||||
{
|
||||
T* newData = capacity != 0 ? (T*)This::Allocate(capacity * sizeof(T)) : nullptr;
|
||||
if (oldCount)
|
||||
{
|
||||
if (newCount > 0)
|
||||
Memory::MoveItems(newData, _data, newCount);
|
||||
Memory::DestructItems(_data, oldCount);
|
||||
}
|
||||
if (_data)
|
||||
This::Free(_data, _size);
|
||||
_data = newData;
|
||||
_size = capacity * sizeof(T);
|
||||
}
|
||||
|
||||
FORCE_INLINE void Free()
|
||||
{
|
||||
if (_data)
|
||||
{
|
||||
This::Free(_data, _size);
|
||||
_data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void Swap(Data& other)
|
||||
{
|
||||
::Swap(_data, other._data);
|
||||
::Swap(_size, other._size);
|
||||
}
|
||||
};
|
||||
};
|
||||
@@ -4476,6 +4476,8 @@ void PhysicsBackend::DestroyController(void* controller)
|
||||
|
||||
void PhysicsBackend::DestroyMaterial(void* material)
|
||||
{
|
||||
if (!PhysX)
|
||||
return; // Skip when called by Content unload after Physics is disposed
|
||||
ASSERT_LOW_LAYER(material);
|
||||
auto materialPhysX = (PxMaterial*)material;
|
||||
materialPhysX->userData = nullptr;
|
||||
@@ -4486,6 +4488,8 @@ void PhysicsBackend::DestroyMaterial(void* material)
|
||||
|
||||
void PhysicsBackend::DestroyObject(void* object)
|
||||
{
|
||||
if (!PhysX)
|
||||
return; // Skip when called by Content unload after Physics is disposed
|
||||
ASSERT_LOW_LAYER(object);
|
||||
auto objectPhysX = (PxBase*)object;
|
||||
FlushLocker.Lock();
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "Engine/Content/Content.h"
|
||||
#include "Engine/Graphics/GPUContext.h"
|
||||
#include "Engine/Graphics/GPUDevice.h"
|
||||
#include "Engine/Graphics/Graphics.h"
|
||||
#include "Engine/Graphics/RenderTask.h"
|
||||
#include "Engine/Graphics/RenderBuffers.h"
|
||||
#include "Engine/Graphics/RenderTargetPool.h"
|
||||
@@ -40,6 +39,8 @@
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES 0 // Forces to redraw all object tiles every frame
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS 0 // Debug draws object bounds on redraw (and tile draw projection locations)
|
||||
#define GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS 0 // Debug draws culled chunks bounds (non-empty)
|
||||
#define GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME 500 // Limits the amount of newly added objects to atlas per-frame to reduce hitches on 1st frame or camera-cut
|
||||
#define GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(flags) (EnumHasAnyFlags(flags, StaticFlags::Lightmap) ? 200 : 10) // Amount of frames after which update object (less frequent updates for static scenes)
|
||||
|
||||
#if GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_OBJECTS || GLOBAL_SURFACE_ATLAS_DEBUG_DRAW_CHUNKS
|
||||
#include "Engine/Debug/DebugDraw.h"
|
||||
@@ -131,6 +132,7 @@ class GlobalSurfaceAtlasCustomBuffer : public RenderBuffers::CustomBuffer, publi
|
||||
{
|
||||
public:
|
||||
int32 Resolution = 0;
|
||||
float ResolutionInv;
|
||||
int32 AtlasPixelsTotal = 0;
|
||||
int32 AtlasPixelsUsed = 0;
|
||||
uint64 LastFrameAtlasInsertFail = 0;
|
||||
@@ -155,7 +157,6 @@ public:
|
||||
Array<void*> DirtyObjectsBuffer;
|
||||
Vector4 CullingPosDistance;
|
||||
uint64 CurrentFrame;
|
||||
float ResolutionInv;
|
||||
Float3 ViewPosition;
|
||||
float TileTexelsPerWorldUnit;
|
||||
float DistanceScalingStart;
|
||||
@@ -164,7 +165,7 @@ public:
|
||||
float MinObjectRadius;
|
||||
|
||||
// Async objects drawing cache
|
||||
Array<int64, FixedAllocation<2>> AsyncDrawWaitLabels;
|
||||
Array<int64, FixedAllocation<3>> AsyncDrawWaitLabels;
|
||||
RenderListBuffer<GlobalSurfaceAtlasTile*> AsyncFreeTiles;
|
||||
RenderListBuffer<GlobalSurfaceAtlasNewObject> AsyncNewObjects;
|
||||
RenderListBuffer<GlobalSurfaceAtlasNewTile> AsyncNewTiles;
|
||||
@@ -178,7 +179,7 @@ public:
|
||||
|
||||
void ClearObjects()
|
||||
{
|
||||
WaitForDrawActors();
|
||||
WaitForDrawing();
|
||||
CulledObjectsCounterIndex = -1;
|
||||
CulledObjectsUsageHistory.Clear();
|
||||
LastFrameAtlasDefragmentation = Engine::FrameCount;
|
||||
@@ -250,16 +251,17 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void StartDrawActors(const RenderContext& renderContext, bool enableAsync = false)
|
||||
void StartDrawing(const RenderContext& renderContext, bool enableAsync = false)
|
||||
{
|
||||
if (AsyncDrawWaitLabels.HasItems())
|
||||
return; // Already started earlier this frame
|
||||
int32 resolution;
|
||||
float distance;
|
||||
GetOptions(renderContext, resolution, distance);
|
||||
const float resolutionInv = 1.0f / (float)resolution;
|
||||
if (Resolution != resolution)
|
||||
return; // Not yet initialized
|
||||
PROFILE_CPU();
|
||||
const auto currentFrame = Engine::FrameCount;
|
||||
if (Resolution == resolution)
|
||||
{
|
||||
// Perform atlas defragmentation if needed
|
||||
constexpr float maxUsageToDefrag = 0.8f;
|
||||
@@ -281,7 +283,6 @@ public:
|
||||
|
||||
// Setup data for rendering
|
||||
CurrentFrame = currentFrame;
|
||||
ResolutionInv = resolutionInv;
|
||||
ViewPosition = renderContext.View.Position;
|
||||
TileTexelsPerWorldUnit = 1.0f / METERS_TO_UNITS(0.1f); // Scales the tiles resolution
|
||||
DistanceScalingStart = METERS_TO_UNITS(20.0f); // Distance from camera at which the tiles resolution starts to be scaled down
|
||||
@@ -301,42 +302,43 @@ public:
|
||||
|
||||
if (enableAsync)
|
||||
{
|
||||
// Run in async via Job System
|
||||
// Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing)
|
||||
Function<void(int32)> func;
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJob>(this);
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJobSync>(this);
|
||||
const int32 jobCount = Math::Max(JobSystem::GetThreadsCount() - 1, 1); // Leave 1 thread unused to not block the main-thread (jobs will overlap with rendering)
|
||||
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount));
|
||||
|
||||
// Run sync actors drawing now or force in async (different drawing path doesn't interfere with normal scene drawing)
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJobSync>(this);
|
||||
// Run in async via Job System
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::DrawActorsJob>(this);
|
||||
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, jobCount));
|
||||
|
||||
// Run dependant job that will process objects data in async
|
||||
func.Bind<GlobalSurfaceAtlasCustomBuffer, &GlobalSurfaceAtlasCustomBuffer::SetupJob>(this);
|
||||
AsyncDrawWaitLabels.Add(JobSystem::Dispatch(func, ToSpan(AsyncDrawWaitLabels)));
|
||||
}
|
||||
else
|
||||
{
|
||||
DrawActorsJob(-1);
|
||||
DrawActorsJob(0);
|
||||
SetupJob(0);
|
||||
}
|
||||
}
|
||||
|
||||
void WaitForDrawActors()
|
||||
void WaitForDrawing()
|
||||
{
|
||||
for (int64 label : AsyncDrawWaitLabels)
|
||||
JobSystem::Wait(label);
|
||||
AsyncDrawWaitLabels.Clear();
|
||||
}
|
||||
|
||||
void PostDrawActors()
|
||||
void FlushNewObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Post Draw");
|
||||
PROFILE_CPU_NAMED("Flush Atlas");
|
||||
|
||||
// Flush atlas tiles freeing
|
||||
for (auto* tile : AsyncFreeTiles)
|
||||
{
|
||||
Atlas.Free(tile, this);
|
||||
}
|
||||
AsyncFreeTiles.Clear();
|
||||
|
||||
// Flush new objects adding
|
||||
for (auto& newObject : AsyncNewObjects)
|
||||
{
|
||||
auto& object = Objects[newObject.ActorObject];
|
||||
@@ -350,7 +352,6 @@ public:
|
||||
}
|
||||
AsyncNewObjects.Clear();
|
||||
|
||||
// Flush new tiles adding
|
||||
for (auto& newTile : AsyncNewTiles)
|
||||
{
|
||||
auto& object = Objects[newTile.ActorObject];
|
||||
@@ -371,6 +372,114 @@ public:
|
||||
AsyncNewTiles.Clear();
|
||||
}
|
||||
|
||||
void CompactObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Compact Objects");
|
||||
for (auto it = Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
if (it->Value.LastFrameUsed != CurrentFrame)
|
||||
{
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
{
|
||||
if (tile)
|
||||
Atlas.Free(tile, this);
|
||||
}
|
||||
Objects.Remove(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WriteObjects()
|
||||
{
|
||||
PROFILE_CPU_NAMED("Write Objects");
|
||||
DirtyObjectsBuffer.Clear();
|
||||
ObjectsBuffer.Clear();
|
||||
for (auto& e : Objects)
|
||||
{
|
||||
auto& object = e.Value;
|
||||
if (object.Dirty)
|
||||
{
|
||||
// Collect dirty objects
|
||||
object.LastFrameUpdated = CurrentFrame;
|
||||
object.LightingUpdateFrame = CurrentFrame;
|
||||
DirtyObjectsBuffer.Add(e.Key);
|
||||
}
|
||||
|
||||
Matrix3x3 worldToLocalRotation;
|
||||
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
|
||||
Float3 worldPosition = object.Bounds.Transformation.Translation;
|
||||
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
|
||||
|
||||
// Write to objects buffer (this must match unpacking logic in HLSL)
|
||||
uint32 objectAddress = ObjectsBuffer.Data.Count() / sizeof(Float4);
|
||||
auto* objectData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = Float4(object.Position, object.Radius);
|
||||
objectData[1] = Float4::Zero;
|
||||
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
|
||||
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
|
||||
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
|
||||
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
|
||||
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
|
||||
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
|
||||
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
auto* tile = object.Tiles[tileIndex];
|
||||
if (!tile)
|
||||
continue;
|
||||
tile->ObjectAddressOffset = *objectDataSize;
|
||||
tile->Address = objectAddress + tile->ObjectAddressOffset;
|
||||
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
|
||||
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
|
||||
|
||||
// Setup view to render object from the side
|
||||
Float3 xAxis, yAxis, zAxis = Float3::Zero;
|
||||
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
|
||||
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
|
||||
Float3::Cross(yAxis, zAxis, xAxis);
|
||||
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
|
||||
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
|
||||
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
|
||||
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
|
||||
xAxis.NormalizeFast();
|
||||
yAxis.NormalizeFast();
|
||||
zAxis.NormalizeFast();
|
||||
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
|
||||
tile->ViewDirection = zAxis;
|
||||
|
||||
// Create view matrix
|
||||
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
|
||||
|
||||
// Calculate object bounds size in the view
|
||||
OrientedBoundingBox viewBounds(object.Bounds);
|
||||
viewBounds.Transform(tile->ViewMatrix);
|
||||
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
|
||||
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
|
||||
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * ResolutionInv;
|
||||
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
|
||||
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SetupJob(int32)
|
||||
{
|
||||
PROFILE_CPU();
|
||||
FlushNewObjects();
|
||||
CompactObjects();
|
||||
WriteObjects();
|
||||
}
|
||||
|
||||
// [ISceneRenderingListener]
|
||||
void OnSceneRenderingAddActor(Actor* a) override
|
||||
{
|
||||
@@ -550,7 +659,7 @@ void GlobalSurfaceAtlasPass::Dispose()
|
||||
void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContextBatch)
|
||||
{
|
||||
// Check if Global Surface Atlas will be used this frame
|
||||
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
|
||||
PROFILE_CPU_NAMED("Global Surface Atlas");
|
||||
if (checkIfSkipPass())
|
||||
return;
|
||||
RenderContext& renderContext = renderContextBatch.GetMainContext();
|
||||
@@ -563,7 +672,7 @@ void GlobalSurfaceAtlasPass::OnCollectDrawCalls(RenderContextBatch& renderContex
|
||||
return;
|
||||
auto& surfaceAtlasData = *renderContext.Buffers->GetCustomBuffer<GlobalSurfaceAtlasCustomBuffer>(TEXT("GlobalSurfaceAtlas"));
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
surfaceAtlasData.StartDrawActors(renderContext, renderContextBatch.EnableAsync);
|
||||
surfaceAtlasData.StartDrawing(renderContext, renderContextBatch.EnableAsync);
|
||||
}
|
||||
|
||||
bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* context, BindingData& result)
|
||||
@@ -590,10 +699,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
surfaceAtlasData.LastFrameUsed = currentFrame;
|
||||
PROFILE_GPU_CPU_NAMED("Global Surface Atlas");
|
||||
|
||||
// Start objects drawing (in case not et started earlier this frame)
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
surfaceAtlasData.StartDrawActors(renderContext);
|
||||
|
||||
// Setup options
|
||||
int32 resolution;
|
||||
float distance;
|
||||
@@ -620,6 +725,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
INIT_ATLAS_TEXTURE(AtlasDepth, PixelFormat::D16_UNorm);
|
||||
#undef INIT_ATLAS_TEXTURE
|
||||
surfaceAtlasData.Resolution = resolution;
|
||||
surfaceAtlasData.ResolutionInv = resolutionInv;
|
||||
surfaceAtlasData.AtlasPixelsTotal = resolution * resolution;
|
||||
if (!surfaceAtlasData.ChunksBuffer)
|
||||
{
|
||||
@@ -635,6 +741,11 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
if (!_vertexBuffer)
|
||||
_vertexBuffer = New<DynamicVertexBuffer>(0u, (uint32)sizeof(AtlasTileVertex), TEXT("GlobalSurfaceAtlas.VertexBuffer"));
|
||||
|
||||
// Ensure that async objects drawing ended
|
||||
_surfaceAtlasData = &surfaceAtlasData;
|
||||
surfaceAtlasData.StartDrawing(renderContext); // (ignored if not started earlier this frame)
|
||||
surfaceAtlasData.WaitForDrawing();
|
||||
|
||||
// Utility for writing into tiles vertex buffer
|
||||
const Float2 posToClipMul(2.0f * resolutionInv, -2.0f * resolutionInv);
|
||||
const Float2 posToClipAdd(-1.0f, 1.0f);
|
||||
@@ -665,110 +776,6 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->BindVB(ToSpan(&vb, 1)); \
|
||||
context->DrawInstanced(_vertexBuffer->Data.Count() / sizeof(AtlasTileVertex), 1);
|
||||
|
||||
// Ensure that async objects drawing ended
|
||||
surfaceAtlasData.WaitForDrawActors();
|
||||
surfaceAtlasData.PostDrawActors();
|
||||
|
||||
// Remove unused objects
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Compact Objects");
|
||||
for (auto it = surfaceAtlasData.Objects.Begin(); it.IsNotEnd(); ++it)
|
||||
{
|
||||
if (it->Value.LastFrameUsed != currentFrame)
|
||||
{
|
||||
for (auto& tile : it->Value.Tiles)
|
||||
{
|
||||
if (tile)
|
||||
surfaceAtlasData.Atlas.Free(tile, &surfaceAtlasData);
|
||||
}
|
||||
surfaceAtlasData.Objects.Remove(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Write objects to the data buffer
|
||||
{
|
||||
PROFILE_CPU_NAMED("Write Objects");
|
||||
surfaceAtlasData.DirtyObjectsBuffer.Clear();
|
||||
surfaceAtlasData.ObjectsBuffer.Clear();
|
||||
for (auto& e : surfaceAtlasData.Objects)
|
||||
{
|
||||
auto& object = e.Value;
|
||||
if (object.Dirty)
|
||||
{
|
||||
// Collect dirty objects
|
||||
object.LastFrameUpdated = surfaceAtlasData.CurrentFrame;
|
||||
object.LightingUpdateFrame = surfaceAtlasData.CurrentFrame;
|
||||
surfaceAtlasData.DirtyObjectsBuffer.Add(e.Key);
|
||||
}
|
||||
|
||||
Matrix3x3 worldToLocalRotation;
|
||||
Matrix3x3::RotationQuaternion(object.Bounds.Transformation.Orientation.Conjugated(), worldToLocalRotation);
|
||||
Float3 worldPosition = object.Bounds.Transformation.Translation;
|
||||
Float3 worldExtents = object.Bounds.Extents * object.Bounds.Transformation.Scale;
|
||||
|
||||
// Write to objects buffer (this must match unpacking logic in HLSL)
|
||||
uint32 objectAddress = surfaceAtlasData.ObjectsBuffer.Data.Count() / sizeof(Float4);
|
||||
auto* objectData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE);
|
||||
objectData[0] = Float4(object.Position, object.Radius);
|
||||
objectData[1] = Float4::Zero;
|
||||
objectData[2] = Float4(worldToLocalRotation.M11, worldToLocalRotation.M12, worldToLocalRotation.M13, worldPosition.X);
|
||||
objectData[3] = Float4(worldToLocalRotation.M21, worldToLocalRotation.M22, worldToLocalRotation.M23, worldPosition.Y);
|
||||
objectData[4] = Float4(worldToLocalRotation.M31, worldToLocalRotation.M32, worldToLocalRotation.M33, worldPosition.Z);
|
||||
objectData[5] = Float4(worldExtents, object.UseVisibility ? 1.0f : 0.0f);
|
||||
auto tileOffsets = reinterpret_cast<uint16*>(&objectData[1]); // xyz used for tile offsets packed into uint16
|
||||
auto objectDataSize = reinterpret_cast<uint32*>(&objectData[1].W); // w used for object size (count of Float4s for object+tiles)
|
||||
*objectDataSize = GLOBAL_SURFACE_ATLAS_OBJECT_DATA_STRIDE;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
auto* tile = object.Tiles[tileIndex];
|
||||
if (!tile)
|
||||
continue;
|
||||
tile->ObjectAddressOffset = *objectDataSize;
|
||||
tile->Address = objectAddress + tile->ObjectAddressOffset;
|
||||
tileOffsets[tileIndex] = tile->ObjectAddressOffset;
|
||||
*objectDataSize += GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE;
|
||||
|
||||
// Setup view to render object from the side
|
||||
Float3 xAxis, yAxis, zAxis = Float3::Zero;
|
||||
zAxis.Raw[tileIndex / 2] = tileIndex & 1 ? 1.0f : -1.0f;
|
||||
yAxis = tileIndex == 2 || tileIndex == 3 ? Float3::Right : Float3::Up;
|
||||
Float3::Cross(yAxis, zAxis, xAxis);
|
||||
Float3 localSpaceOffset = -zAxis * object.Bounds.Extents;
|
||||
xAxis = object.Bounds.Transformation.LocalToWorldVector(xAxis);
|
||||
yAxis = object.Bounds.Transformation.LocalToWorldVector(yAxis);
|
||||
zAxis = object.Bounds.Transformation.LocalToWorldVector(zAxis);
|
||||
xAxis.NormalizeFast();
|
||||
yAxis.NormalizeFast();
|
||||
zAxis.NormalizeFast();
|
||||
tile->ViewPosition = object.Bounds.Transformation.LocalToWorld(localSpaceOffset);
|
||||
tile->ViewDirection = zAxis;
|
||||
|
||||
// Create view matrix
|
||||
tile->ViewMatrix.SetColumn1(Float4(xAxis, -Float3::Dot(xAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn2(Float4(yAxis, -Float3::Dot(yAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn3(Float4(zAxis, -Float3::Dot(zAxis, tile->ViewPosition)));
|
||||
tile->ViewMatrix.SetColumn4(Float4(0, 0, 0, 1));
|
||||
|
||||
// Calculate object bounds size in the view
|
||||
OrientedBoundingBox viewBounds(object.Bounds);
|
||||
viewBounds.Transform(tile->ViewMatrix);
|
||||
Float3 viewExtent = viewBounds.Transformation.LocalToWorldVector(viewBounds.Extents);
|
||||
tile->ViewBoundsSize = viewExtent.GetAbsolute() * 2.0f;
|
||||
|
||||
// Per-tile data
|
||||
const float tileWidth = (float)tile->Width - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
const float tileHeight = (float)tile->Height - GLOBAL_SURFACE_ATLAS_TILE_PADDING;
|
||||
auto* tileData = surfaceAtlasData.ObjectsBuffer.WriteReserve<Float4>(GLOBAL_SURFACE_ATLAS_TILE_DATA_STRIDE);
|
||||
tileData[0] = Float4(tile->X, tile->Y, tileWidth, tileHeight) * surfaceAtlasData.ResolutionInv;
|
||||
tileData[1] = Float4(tile->ViewMatrix.M11, tile->ViewMatrix.M12, tile->ViewMatrix.M13, tile->ViewMatrix.M41);
|
||||
tileData[2] = Float4(tile->ViewMatrix.M21, tile->ViewMatrix.M22, tile->ViewMatrix.M23, tile->ViewMatrix.M42);
|
||||
tileData[3] = Float4(tile->ViewMatrix.M31, tile->ViewMatrix.M32, tile->ViewMatrix.M33, tile->ViewMatrix.M43);
|
||||
tileData[4] = Float4(tile->ViewBoundsSize, 0.0f); // w unused
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rasterize world geometry material properties into Global Surface Atlas
|
||||
if (surfaceAtlasData.DirtyObjectsBuffer.Count() != 0)
|
||||
{
|
||||
@@ -795,7 +802,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
context->SetRenderTarget(depthBuffer, ToSpan(targetBuffers, ARRAY_COUNT(targetBuffers)));
|
||||
{
|
||||
PROFILE_GPU_CPU_NAMED("Clear");
|
||||
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD)
|
||||
if (noCache || GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES)
|
||||
{
|
||||
// Full-atlas hardware clear
|
||||
context->ClearDepth(depthBuffer);
|
||||
@@ -1084,7 +1091,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
{
|
||||
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
|
||||
lightData.LastFrameUsed = currentFrame;
|
||||
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
|
||||
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
|
||||
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
|
||||
continue;
|
||||
lightData.LastFrameUpdated = currentFrame;
|
||||
@@ -1119,7 +1126,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
{
|
||||
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
|
||||
lightData.LastFrameUsed = currentFrame;
|
||||
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
|
||||
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
|
||||
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
|
||||
continue;
|
||||
lightData.LastFrameUpdated = currentFrame;
|
||||
@@ -1141,7 +1148,7 @@ bool GlobalSurfaceAtlasPass::Render(RenderContext& renderContext, GPUContext* co
|
||||
{
|
||||
GlobalSurfaceAtlasLight& lightData = surfaceAtlasData.Lights[light.ID];
|
||||
lightData.LastFrameUsed = currentFrame;
|
||||
uint32 redrawFramesCount = EnumHasAnyFlags(light.StaticFlags, StaticFlags::Lightmap) ? 120 : 4;
|
||||
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(light.StaticFlags);
|
||||
if (surfaceAtlasData.CurrentFrame - lightData.LastFrameUpdated < (redrawFramesCount + (light.ID.D & redrawFramesCount)))
|
||||
continue;
|
||||
lightData.LastFrameUpdated = currentFrame;
|
||||
@@ -1457,9 +1464,9 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
|
||||
const float distanceScale = Math::Lerp(1.0f, surfaceAtlasData.DistanceScaling, Math::InverseLerp(surfaceAtlasData.DistanceScalingStart, surfaceAtlasData.DistanceScalingEnd, (float)CollisionsHelper::DistanceSpherePoint(actorObjectBounds, surfaceAtlasData.ViewPosition)));
|
||||
const float tilesScale = surfaceAtlasData.TileTexelsPerWorldUnit * distanceScale * qualityScale;
|
||||
GlobalSurfaceAtlasObject* object = surfaceAtlasData.Objects.TryGet(actorObject);
|
||||
if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= 512)
|
||||
if (!object && surfaceAtlasData.AsyncNewObjects.Count() >= GLOBAL_SURFACE_ATLAS_MAX_NEW_OBJECTS_PER_FRAME)
|
||||
return; // Reduce load on 1st frame and add more objects during next frames to balance performance
|
||||
bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES || !GPU_SPREAD_WORKLOAD;
|
||||
bool anyTile = false, dirty = GLOBAL_SURFACE_ATLAS_DEBUG_FORCE_REDRAW_TILES;
|
||||
for (int32 tileIndex = 0; tileIndex < 6; tileIndex++)
|
||||
{
|
||||
if (((1 << tileIndex) & tilesMask) == 0)
|
||||
@@ -1515,7 +1522,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
|
||||
if (object)
|
||||
{
|
||||
// Redraw objects from time-to-time (dynamic objects can be animated, static objects can have textures streamed)
|
||||
uint32 redrawFramesCount = actor->HasStaticFlag(StaticFlags::Lightmap) ? 120 : 4;
|
||||
uint32 redrawFramesCount = GLOBAL_SURFACE_ATLAS_DIRTY_FRAMES(actor->GetStaticFlags());
|
||||
if (surfaceAtlasData.CurrentFrame - object->LastFrameUpdated >= (redrawFramesCount + (actor->GetID().D & redrawFramesCount)))
|
||||
dirty = true;
|
||||
|
||||
@@ -1525,7 +1532,7 @@ void GlobalSurfaceAtlasPass::RasterizeActor(Actor* actor, void* actorObject, con
|
||||
object->Bounds = bounds;
|
||||
object->Position = (Float3)actorObjectBounds.Center; // TODO: large worlds
|
||||
object->Radius = (float)actorObjectBounds.Radius;
|
||||
object->Dirty = dirty;
|
||||
object->Dirty |= dirty;
|
||||
object->UseVisibility = useVisibility;
|
||||
}
|
||||
else
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -39,20 +39,15 @@ private:
|
||||
GPUShaderProgramCS* _csGenerateMip = nullptr;
|
||||
GPUConstantBuffer* _cb0 = nullptr;
|
||||
GPUConstantBuffer* _cb1 = nullptr;
|
||||
|
||||
// Rasterization cache
|
||||
class DynamicStructuredBuffer* _objectsBuffer = nullptr;
|
||||
Array<GPUTextureView*> _objectsTextures;
|
||||
uint16 _objectsBufferCount;
|
||||
int32 _cascadeIndex;
|
||||
float _voxelSize, _chunkSize;
|
||||
BoundingBox _cascadeBounds;
|
||||
BoundingBox _cascadeCullingBounds;
|
||||
class GlobalSignDistanceFieldCustomBuffer* _sdfData;
|
||||
Vector3 _sdfDataOriginMin;
|
||||
Vector3 _sdfDataOriginMax;
|
||||
|
||||
public:
|
||||
/// <summary>
|
||||
/// Calls drawing scene objects in async early in the frame.
|
||||
/// </summary>
|
||||
/// <param name="renderContextBatch">The rendering context batch.</param>
|
||||
void OnCollectDrawCalls(RenderContextBatch& renderContextBatch);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the Global SDF (only if enabled in Graphics Settings).
|
||||
/// </summary>
|
||||
@@ -78,10 +73,7 @@ public:
|
||||
/// <param name="output">The output buffer.</param>
|
||||
void RenderDebug(RenderContext& renderContext, GPUContext* context, GPUTexture* output);
|
||||
|
||||
void GetCullingData(BoundingBox& bounds) const
|
||||
{
|
||||
bounds = _cascadeCullingBounds;
|
||||
}
|
||||
void GetCullingData(BoundingBox& bounds) const;
|
||||
|
||||
// Rasterize Model SDF into the Global SDF. Call it from actor Draw() method during DrawPass::GlobalSDF.
|
||||
void RasterizeModelSDF(Actor* actor, const ModelBase::SDFData& sdf, const Transform& localToWorld, const BoundingBox& objectBounds);
|
||||
|
||||
@@ -30,13 +30,7 @@ namespace
|
||||
Array<DrawBatch> SortingBatches;
|
||||
Array<RenderList*> FreeRenderList;
|
||||
|
||||
struct MemPoolEntry
|
||||
{
|
||||
void* Ptr;
|
||||
uintptr Size;
|
||||
};
|
||||
|
||||
Array<MemPoolEntry> MemPool;
|
||||
Array<Pair<void*, uintptr>> MemPool;
|
||||
CriticalSection MemPoolLocker;
|
||||
}
|
||||
|
||||
@@ -147,18 +141,16 @@ void* RendererAllocation::Allocate(uintptr size)
|
||||
MemPoolLocker.Lock();
|
||||
for (int32 i = 0; i < MemPool.Count(); i++)
|
||||
{
|
||||
if (MemPool[i].Size == size)
|
||||
if (MemPool.Get()[i].Second == size)
|
||||
{
|
||||
result = MemPool[i].Ptr;
|
||||
result = MemPool.Get()[i].First;
|
||||
MemPool.RemoveAt(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
MemPoolLocker.Unlock();
|
||||
if (!result)
|
||||
{
|
||||
result = Platform::Allocate(size, 16);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -201,7 +193,7 @@ void RenderList::CleanupCache()
|
||||
SortingIndices.Resize(0);
|
||||
FreeRenderList.ClearDelete();
|
||||
for (auto& e : MemPool)
|
||||
Platform::Free(e.Ptr);
|
||||
Platform::Free(e.First);
|
||||
MemPool.Clear();
|
||||
}
|
||||
|
||||
|
||||
@@ -409,6 +409,8 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
|
||||
JobSystem::SetJobStartingOnDispatch(false);
|
||||
task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDraw);
|
||||
task->OnCollectDrawCalls(renderContextBatch, SceneRendering::DrawCategory::SceneDrawAsync);
|
||||
if (setup.UseGlobalSDF)
|
||||
GlobalSignDistanceFieldPass::Instance()->OnCollectDrawCalls(renderContextBatch);
|
||||
if (setup.UseGlobalSurfaceAtlas)
|
||||
GlobalSurfaceAtlasPass::Instance()->OnCollectDrawCalls(renderContextBatch);
|
||||
|
||||
|
||||
@@ -2,86 +2,11 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Engine/Core/Memory/Memory.h"
|
||||
#include "Engine/Core/Types/BaseTypes.h"
|
||||
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
|
||||
|
||||
class RendererAllocation
|
||||
class RendererAllocation : public SimpleHeapAllocation<RendererAllocation, 64>
|
||||
{
|
||||
public:
|
||||
static FLAXENGINE_API void* Allocate(uintptr size);
|
||||
static FLAXENGINE_API void Free(void* ptr, uintptr size);
|
||||
|
||||
enum { HasSwap = true };
|
||||
|
||||
template<typename T>
|
||||
class Data
|
||||
{
|
||||
T* _data = nullptr;
|
||||
uintptr _size;
|
||||
|
||||
public:
|
||||
FORCE_INLINE Data()
|
||||
{
|
||||
}
|
||||
|
||||
FORCE_INLINE ~Data()
|
||||
{
|
||||
if (_data)
|
||||
RendererAllocation::Free(_data, _size);
|
||||
}
|
||||
|
||||
FORCE_INLINE T* Get()
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
||||
FORCE_INLINE const T* Get() const
|
||||
{
|
||||
return _data;
|
||||
}
|
||||
|
||||
FORCE_INLINE int32 CalculateCapacityGrow(int32 capacity, int32 minCapacity) const
|
||||
{
|
||||
capacity = capacity ? capacity * 2 : 64;
|
||||
if (capacity < minCapacity)
|
||||
capacity = minCapacity;
|
||||
return capacity;
|
||||
}
|
||||
|
||||
FORCE_INLINE void Allocate(uint64 capacity)
|
||||
{
|
||||
_size = capacity * sizeof(T);
|
||||
_data = (T*)RendererAllocation::Allocate(_size);
|
||||
}
|
||||
|
||||
FORCE_INLINE void Relocate(uint64 capacity, int32 oldCount, int32 newCount)
|
||||
{
|
||||
T* newData = capacity != 0 ? (T*)RendererAllocation::Allocate(capacity * sizeof(T)) : nullptr;
|
||||
if (oldCount)
|
||||
{
|
||||
if (newCount > 0)
|
||||
Memory::MoveItems(newData, _data, newCount);
|
||||
Memory::DestructItems(_data, oldCount);
|
||||
}
|
||||
if (_data)
|
||||
RendererAllocation::Free(_data, _size);
|
||||
_data = newData;
|
||||
_size = capacity * sizeof(T);
|
||||
}
|
||||
|
||||
FORCE_INLINE void Free()
|
||||
{
|
||||
if (_data)
|
||||
{
|
||||
RendererAllocation::Free(_data, _size);
|
||||
_data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
FORCE_INLINE void Swap(Data& other)
|
||||
{
|
||||
::Swap(_data, other._data);
|
||||
::Swap(_size, other._size);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -5,34 +5,29 @@
|
||||
#include "Engine/Platform/CPUInfo.h"
|
||||
#include "Engine/Platform/Thread.h"
|
||||
#include "Engine/Platform/ConditionVariable.h"
|
||||
#include "Engine/Core/Types/Span.h"
|
||||
#include "Engine/Core/Types/Pair.h"
|
||||
#include "Engine/Core/Memory/SimpleHeapAllocation.h"
|
||||
#include "Engine/Core/Collections/Dictionary.h"
|
||||
#include "Engine/Core/Collections/RingBuffer.h"
|
||||
#include "Engine/Engine/EngineService.h"
|
||||
#include "Engine/Profiler/ProfilerCPU.h"
|
||||
#if USE_CSHARP
|
||||
#include "Engine/Scripting/ManagedCLR/MCore.h"
|
||||
#endif
|
||||
|
||||
// Jobs storage perf info:
|
||||
// (500 jobs, i7 9th gen)
|
||||
// JOB_SYSTEM_USE_MUTEX=1, enqueue=130-280 cycles, dequeue=2-6 cycles
|
||||
// JOB_SYSTEM_USE_MUTEX=0, enqueue=300-700 cycles, dequeue=10-16 cycles
|
||||
// So using RingBuffer+Mutex+Signals is better than moodycamel::ConcurrentQueue
|
||||
|
||||
#define JOB_SYSTEM_ENABLED 1
|
||||
#define JOB_SYSTEM_USE_MUTEX 1
|
||||
#define JOB_SYSTEM_USE_STATS 0
|
||||
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
#include "Engine/Core/Log.h"
|
||||
#endif
|
||||
#if JOB_SYSTEM_USE_MUTEX
|
||||
#include "Engine/Core/Collections/RingBuffer.h"
|
||||
#else
|
||||
#include "ConcurrentQueue.h"
|
||||
#endif
|
||||
|
||||
#if JOB_SYSTEM_ENABLED
|
||||
|
||||
// Local allocator for job system memory that uses internal pooling and assumes that JobsLocker is taken (write access owned by the calling thread).
|
||||
class JobSystemAllocation : public SimpleHeapAllocation<JobSystemAllocation>
|
||||
{
|
||||
public:
|
||||
static void* Allocate(uintptr size);
|
||||
static void Free(void* ptr, uintptr size);
|
||||
};
|
||||
|
||||
class JobSystemService : public EngineService
|
||||
{
|
||||
public:
|
||||
@@ -48,13 +43,26 @@ public:
|
||||
|
||||
struct JobData
|
||||
{
|
||||
Function<void(int32)> Job;
|
||||
int32 Index;
|
||||
int64 JobKey;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct TIsPODType<JobData>
|
||||
{
|
||||
enum { Value = true };
|
||||
};
|
||||
|
||||
struct JobContext
|
||||
{
|
||||
volatile int64 JobsLeft;
|
||||
int32 DependenciesLeft;
|
||||
Function<void(int32)> Job;
|
||||
Array<int64, JobSystemAllocation> Dependants;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct TIsPODType<JobContext>
|
||||
{
|
||||
enum { Value = false };
|
||||
};
|
||||
@@ -79,40 +87,44 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
struct JobContext
|
||||
{
|
||||
volatile int64 JobsLeft;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct TIsPODType<JobContext>
|
||||
{
|
||||
enum { Value = true };
|
||||
};
|
||||
|
||||
namespace
|
||||
{
|
||||
JobSystemService JobSystemInstance;
|
||||
Array<Pair<void*, uintptr>> MemPool;
|
||||
Thread* Threads[PLATFORM_THREADS_LIMIT / 2] = {};
|
||||
int32 ThreadsCount = 0;
|
||||
bool JobStartingOnDispatch = true;
|
||||
volatile int64 ExitFlag = 0;
|
||||
volatile int64 JobLabel = 0;
|
||||
Dictionary<int64, JobContext> JobContexts;
|
||||
Dictionary<int64, JobContext, JobSystemAllocation> JobContexts;
|
||||
ConditionVariable JobsSignal;
|
||||
CriticalSection JobsMutex;
|
||||
ConditionVariable WaitSignal;
|
||||
CriticalSection WaitMutex;
|
||||
CriticalSection JobsLocker;
|
||||
#if JOB_SYSTEM_USE_MUTEX
|
||||
RingBuffer<JobData> Jobs;
|
||||
#else
|
||||
ConcurrentQueue<JobData> Jobs;
|
||||
#endif
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
int64 DequeueCount = 0;
|
||||
int64 DequeueSum = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void* JobSystemAllocation::Allocate(uintptr size)
|
||||
{
|
||||
void* result = nullptr;
|
||||
for (int32 i = 0; i < MemPool.Count(); i++)
|
||||
{
|
||||
if (MemPool.Get()[i].Second == size)
|
||||
{
|
||||
result = MemPool.Get()[i].First;
|
||||
MemPool.RemoveAt(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!result)
|
||||
result = Platform::Allocate(size, 16);
|
||||
return result;
|
||||
}
|
||||
|
||||
void JobSystemAllocation::Free(void* ptr, uintptr size)
|
||||
{
|
||||
MemPool.Add({ ptr, size });
|
||||
}
|
||||
|
||||
bool JobSystemService::Init()
|
||||
@@ -151,6 +163,12 @@ void JobSystemService::Dispose()
|
||||
Threads[i] = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
JobContexts.SetCapacity(0);
|
||||
Jobs.Release();
|
||||
for (auto& e : MemPool)
|
||||
Platform::Free(e.First);
|
||||
MemPool.Clear();
|
||||
}
|
||||
|
||||
int32 JobSystemThread::Run()
|
||||
@@ -158,34 +176,22 @@ int32 JobSystemThread::Run()
|
||||
Platform::SetThreadAffinityMask(1ull << Index);
|
||||
|
||||
JobData data;
|
||||
Function<void(int32)> job;
|
||||
bool attachCSharpThread = true;
|
||||
#if !JOB_SYSTEM_USE_MUTEX
|
||||
moodycamel::ConsumerToken consumerToken(Jobs);
|
||||
#endif
|
||||
while (Platform::AtomicRead(&ExitFlag) == 0)
|
||||
{
|
||||
// Try to get a job
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
const auto start = Platform::GetTimeCycles();
|
||||
#endif
|
||||
#if JOB_SYSTEM_USE_MUTEX
|
||||
JobsLocker.Lock();
|
||||
if (Jobs.Count() != 0)
|
||||
{
|
||||
data = Jobs.PeekFront();
|
||||
Jobs.PopFront();
|
||||
const JobContext& context = ((const Dictionary<int64, JobContext>&)JobContexts).At(data.JobKey);
|
||||
job = context.Job;
|
||||
}
|
||||
JobsLocker.Unlock();
|
||||
#else
|
||||
if (!Jobs.try_dequeue(consumerToken, data))
|
||||
data.Job.Unbind();
|
||||
#endif
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
Platform::InterlockedIncrement(&DequeueCount);
|
||||
Platform::InterlockedAdd(&DequeueSum, Platform::GetTimeCycles() - start);
|
||||
#endif
|
||||
|
||||
if (data.Job.IsBinded())
|
||||
if (job.IsBinded())
|
||||
{
|
||||
#if USE_CSHARP
|
||||
// Ensure to have C# thread attached to this thead (late init due to MCore being initialized after Job System)
|
||||
@@ -197,21 +203,37 @@ int32 JobSystemThread::Run()
|
||||
#endif
|
||||
|
||||
// Run job
|
||||
data.Job(data.Index);
|
||||
job(data.Index);
|
||||
|
||||
// Move forward with the job queue
|
||||
bool notifyWaiting = false;
|
||||
JobsLocker.Lock();
|
||||
JobContext& context = JobContexts.At(data.JobKey);
|
||||
if (Platform::InterlockedDecrement(&context.JobsLeft) <= 0)
|
||||
{
|
||||
ASSERT_LOW_LAYER(context.JobsLeft <= 0);
|
||||
// Update any dependant jobs
|
||||
for (int64 dependant : context.Dependants)
|
||||
{
|
||||
JobContext& dependantContext = JobContexts.At(dependant);
|
||||
if (--dependantContext.DependenciesLeft <= 0)
|
||||
{
|
||||
// Dispatch dependency when it's ready
|
||||
JobData dependantData;
|
||||
dependantData.JobKey = dependant;
|
||||
for (dependantData.Index = 0; dependantData.Index < dependantContext.JobsLeft; dependantData.Index++)
|
||||
Jobs.PushBack(dependantData);
|
||||
}
|
||||
}
|
||||
|
||||
// Remove completed context
|
||||
JobContexts.Remove(data.JobKey);
|
||||
notifyWaiting = true;
|
||||
}
|
||||
JobsLocker.Unlock();
|
||||
if (notifyWaiting)
|
||||
WaitSignal.NotifyAll();
|
||||
|
||||
WaitSignal.NotifyAll();
|
||||
|
||||
data.Job.Unbind();
|
||||
job.Unbind();
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -247,39 +269,25 @@ void JobSystem::Execute(const Function<void(int32)>& job, int32 jobCount)
|
||||
|
||||
int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
|
||||
{
|
||||
PROFILE_CPU();
|
||||
if (jobCount <= 0)
|
||||
return 0;
|
||||
PROFILE_CPU();
|
||||
#if JOB_SYSTEM_ENABLED
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
const auto start = Platform::GetTimeCycles();
|
||||
#endif
|
||||
const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
|
||||
|
||||
JobData data;
|
||||
data.Job = job;
|
||||
data.JobKey = label;
|
||||
|
||||
JobContext context;
|
||||
context.Job = job;
|
||||
context.JobsLeft = jobCount;
|
||||
context.DependenciesLeft = 0;
|
||||
|
||||
#if JOB_SYSTEM_USE_MUTEX
|
||||
JobsLocker.Lock();
|
||||
JobContexts.Add(label, context);
|
||||
JobContexts.Add(label, MoveTemp(context));
|
||||
for (data.Index = 0; data.Index < jobCount; data.Index++)
|
||||
Jobs.PushBack(data);
|
||||
JobsLocker.Unlock();
|
||||
#else
|
||||
JobsLocker.Lock();
|
||||
JobContexts.Add(label, context);
|
||||
JobsLocker.Unlock();
|
||||
for (data.Index = 0; data.Index < jobCount; data.Index++)
|
||||
Jobs.enqueue(data);
|
||||
#endif
|
||||
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
LOG(Info, "Job enqueue time: {0} cycles", (int64)(Platform::GetTimeCycles() - start));
|
||||
#endif
|
||||
|
||||
if (JobStartingOnDispatch)
|
||||
{
|
||||
@@ -297,6 +305,56 @@ int64 JobSystem::Dispatch(const Function<void(int32)>& job, int32 jobCount)
|
||||
#endif
|
||||
}
|
||||
|
||||
int64 JobSystem::Dispatch(const Function<void(int32)>& job, Span<int64> dependencies, int32 jobCount)
|
||||
{
|
||||
if (jobCount <= 0)
|
||||
return 0;
|
||||
PROFILE_CPU();
|
||||
#if JOB_SYSTEM_ENABLED
|
||||
const auto label = Platform::InterlockedAdd(&JobLabel, (int64)jobCount) + jobCount;
|
||||
|
||||
JobData data;
|
||||
data.JobKey = label;
|
||||
|
||||
JobContext context;
|
||||
context.Job = job;
|
||||
context.JobsLeft = jobCount;
|
||||
context.DependenciesLeft = 0;
|
||||
|
||||
JobsLocker.Lock();
|
||||
for (int64 dependency : dependencies)
|
||||
{
|
||||
if (JobContext* dependencyContext = JobContexts.TryGet(dependency))
|
||||
{
|
||||
context.DependenciesLeft++;
|
||||
dependencyContext->Dependants.Add(label);
|
||||
}
|
||||
}
|
||||
JobContexts.Add(label, MoveTemp(context));
|
||||
if (context.DependenciesLeft == 0)
|
||||
{
|
||||
// No dependencies left to complete so dispatch now
|
||||
for (data.Index = 0; data.Index < jobCount; data.Index++)
|
||||
Jobs.PushBack(data);
|
||||
}
|
||||
JobsLocker.Unlock();
|
||||
|
||||
if (context.DependenciesLeft == 0 && JobStartingOnDispatch)
|
||||
{
|
||||
if (jobCount == 1)
|
||||
JobsSignal.NotifyOne();
|
||||
else
|
||||
JobsSignal.NotifyAll();
|
||||
}
|
||||
|
||||
return label;
|
||||
#else
|
||||
for (int32 i = 0; i < jobCount; i++)
|
||||
job(i);
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
void JobSystem::Wait()
|
||||
{
|
||||
#if JOB_SYSTEM_ENABLED
|
||||
@@ -340,11 +398,6 @@ void JobSystem::Wait(int64 label)
|
||||
// Wake up any thread to prevent stalling in highly multi-threaded environment
|
||||
JobsSignal.NotifyOne();
|
||||
}
|
||||
|
||||
#if JOB_SYSTEM_USE_STATS
|
||||
LOG(Info, "Job average dequeue time: {0} cycles", DequeueSum / DequeueCount);
|
||||
DequeueSum = DequeueCount = 0;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -352,16 +405,11 @@ void JobSystem::SetJobStartingOnDispatch(bool value)
|
||||
{
|
||||
#if JOB_SYSTEM_ENABLED
|
||||
JobStartingOnDispatch = value;
|
||||
|
||||
if (value)
|
||||
{
|
||||
#if JOB_SYSTEM_USE_MUTEX
|
||||
JobsLocker.Lock();
|
||||
const int32 count = Jobs.Count();
|
||||
JobsLocker.Unlock();
|
||||
#else
|
||||
const int32 count = Jobs.Count();
|
||||
#endif
|
||||
if (count == 1)
|
||||
JobsSignal.NotifyOne();
|
||||
else if (count != 0)
|
||||
|
||||
@@ -4,6 +4,9 @@
|
||||
|
||||
#include "Engine/Core/Delegate.h"
|
||||
|
||||
template<typename T>
|
||||
class Span;
|
||||
|
||||
/// <summary>
|
||||
/// Lightweight multi-threaded jobs execution scheduler. Uses a pool of threads and supports work-stealing concept.
|
||||
/// </summary>
|
||||
@@ -26,6 +29,15 @@ API_CLASS(Static) class FLAXENGINE_API JobSystem
|
||||
/// <returns>The label identifying this dispatch. Can be used to wait for the execution end.</returns>
|
||||
API_FUNCTION() static int64 Dispatch(const Function<void(int32)>& job, int32 jobCount = 1);
|
||||
|
||||
/// <summary>
|
||||
/// Dispatches the job for the execution after all of dependant jobs will complete.
|
||||
/// </summary>
|
||||
/// <param name="job">The job. Argument is an index of the job execution.</param>
|
||||
/// <param name="dependencies">The list of dependant jobs that need to complete in order to start executing this job.</param>
|
||||
/// <param name="jobCount">The job executions count.</param>
|
||||
/// <returns>The label identifying this dispatch. Can be used to wait for the execution end.</returns>
|
||||
API_FUNCTION() static int64 Dispatch(const Function<void(int32)>& job, Span<int64> dependencies, int32 jobCount = 1);
|
||||
|
||||
/// <summary>
|
||||
/// Waits for all dispatched jobs to finish.
|
||||
/// </summary>
|
||||
|
||||
Reference in New Issue
Block a user