Additional improvements to scene rendering

This commit is contained in:
Wojtek Figat
2022-11-15 22:39:03 +01:00
parent a2670dc3b5
commit eb281a7574
11 changed files with 68 additions and 79 deletions

View File

@@ -22,14 +22,12 @@ float3 ViewDir;
float TimeParam;
float4 ViewInfo;
float4 ScreenSize;
float3 WorldInvScale;
float WorldDeterminantSign;
float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
float4 TemporalAAJitter;
float3 GeometrySize;
float Dummy1;
float WorldDeterminantSign;
@1META_CB_END
// Shader resources
@@ -48,6 +46,9 @@ struct GeometryData
nointerpolation float3 InstanceOrigin : TEXCOORD5;
nointerpolation float2 InstanceParams : TEXCOORD6; // x-PerInstanceRandom, y-LODDitherFactor
float3 PrevWorldPosition : TEXCOORD7;
nointerpolation float3 InstanceTransform1 : TEXCOORD8;
nointerpolation float3 InstanceTransform2 : TEXCOORD9;
nointerpolation float3 InstanceTransform3 : TEXCOORD10;
};
// Interpolants passed from the vertex shader
@@ -92,11 +93,9 @@ struct MaterialInput
float3 PreSkinnedNormal;
float3 InstanceOrigin;
float2 InstanceParams;
#if USE_INSTANCING
float3 InstanceTransform1;
float3 InstanceTransform2;
float3 InstanceTransform3;
#endif
#if USE_CUSTOM_VERTEX_INTERPOLATORS
float4 CustomVSToPS[CUSTOM_VERTEX_INTERPOLATORS_COUNT];
#endif
@@ -117,6 +116,9 @@ MaterialInput GetGeometryMaterialInput(GeometryData geometry)
output.TBN = CalcTangentBasis(geometry.WorldNormal, geometry.WorldTangent);
output.InstanceOrigin = geometry.InstanceOrigin;
output.InstanceParams = geometry.InstanceParams;
output.InstanceTransform1 = geometry.InstanceTransform1;
output.InstanceTransform2 = geometry.InstanceTransform2;
output.InstanceTransform3 = geometry.InstanceTransform3;
return output;
}
@@ -154,6 +156,9 @@ GeometryData InterpolateGeometry(GeometryData p0, float w0, GeometryData p1, flo
output.WorldTangent.xyz = normalize(output.WorldTangent.xyz);
output.InstanceOrigin = p0.InstanceOrigin;
output.InstanceParams = p0.InstanceParams;
output.InstanceTransform1 = p0.InstanceTransform1;
output.InstanceTransform2 = p0.InstanceTransform2;
output.InstanceTransform3 = p0.InstanceTransform3;
return output;
}
@@ -170,17 +175,19 @@ MaterialInput GetMaterialInput(PixelInput input)
return output;
}
// Gets the local to world transform matrix (supports instancing)
#if USE_INSTANCING
// Gets the local to world transform matrix
#define GetInstanceTransform(input) float4x4(float4(input.InstanceTransform1.xyz, 0.0f), float4(input.InstanceTransform2.xyz, 0.0f), float4(input.InstanceTransform3.xyz, 0.0f), float4(input.InstanceOrigin.xyz, 1.0f))
// Extarcts the world matrix and instancce transform vector
#if USE_INSTANCING
#define CalculateInstanceTransform(input) float4x4 world = GetInstanceTransform(input); output.Geometry.InstanceTransform1 = input.InstanceTransform1.xyz; output.Geometry.InstanceTransform2 = input.InstanceTransform2.xyz; output.Geometry.InstanceTransform3 = input.InstanceTransform3.xyz;
#else
#define GetInstanceTransform(input) WorldMatrix;
#define CalculateInstanceTransform(input) float4x4 world = WorldMatrix; output.Geometry.InstanceTransform1 = world[0].xyz; output.Geometry.InstanceTransform2 = world[1].xyz; output.Geometry.InstanceTransform3 = world[2].xyz;
#endif
// Removes the scale vector from the local to world transformation matrix (supports instancing)
float3x3 RemoveScaleFromLocalToWorld(float3x3 localToWorld)
{
#if USE_INSTANCING
// Extract per axis scales from localToWorld transform
float scaleX = length(localToWorld[0]);
float scaleY = length(localToWorld[1]);
@@ -189,9 +196,6 @@ float3x3 RemoveScaleFromLocalToWorld(float3x3 localToWorld)
scaleX > 0.00001f ? 1.0f / scaleX : 0.0f,
scaleY > 0.00001f ? 1.0f / scaleY : 0.0f,
scaleZ > 0.00001f ? 1.0f / scaleZ : 0.0f);
#else
float3 invScale = WorldInvScale;
#endif
localToWorld[0] *= invScale.x;
localToWorld[1] *= invScale.y;
localToWorld[2] *= invScale.z;
@@ -333,7 +337,7 @@ VertexOutput VS(ModelInput input)
VertexOutput output;
// Compute world space vertex position
float4x4 world = GetInstanceTransform(input);
CalculateInstanceTransform(input);
output.Geometry.WorldPosition = mul(float4(input.Position.xyz, 1), world).xyz;
output.Geometry.PrevWorldPosition = mul(float4(input.Position.xyz, 1), PrevWorldMatrix).xyz;
@@ -372,11 +376,6 @@ VertexOutput VS(ModelInput input)
materialInput.SvPosition = output.Position;
materialInput.PreSkinnedPosition = input.Position.xyz;
materialInput.PreSkinnedNormal = tangentToLocal[2].xyz;
#if USE_INSTANCING
materialInput.InstanceTransform1 = input.InstanceTransform1.xyz;
materialInput.InstanceTransform2 = input.InstanceTransform2.xyz;
materialInput.InstanceTransform3 = input.InstanceTransform3.xyz;
#endif
Material material = GetMaterialVS(materialInput);
#endif
@@ -411,7 +410,11 @@ META_VS_IN_ELEMENT(ATTRIBUTE,3, R32G32B32_FLOAT, 3, ALIGN, PER_INSTANCE, 1, US
META_VS_IN_ELEMENT(ATTRIBUTE,4, R16G16B16A16_FLOAT,3, ALIGN, PER_INSTANCE, 1, USE_INSTANCING)
float4 VS_Depth(ModelInput_PosOnly input) : SV_Position
{
#if USE_INSTANCING
float4x4 world = GetInstanceTransform(input);
#else
float4x4 world = WorldMatrix;
#endif
float3 worldPosition = mul(float4(input.Position.xyz, 1), world).xyz;
float4 position = mul(float4(worldPosition, 1), ViewProjectionMatrix);
return position;
@@ -515,7 +518,7 @@ VertexOutput VS_Skinned(ModelInput_Skinned input)
float3x3 tangentToLocal = SkinTangents(input, data);
// Compute world space vertex position
float4x4 world = GetInstanceTransform(input);
CalculateInstanceTransform(input);
output.Geometry.WorldPosition = mul(float4(position, 1), world).xyz;
#if PER_BONE_MOTION_BLUR
float3 prevPosition = SkinPrevPosition(input);

View File

@@ -30,14 +30,12 @@ PACK_STRUCT(struct DeferredMaterialShaderData {
float TimeParam;
Float4 ViewInfo;
Float4 ScreenSize;
Float3 WorldInvScale;
float WorldDeterminantSign;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float4 TemporalAAJitter;
Float3 GeometrySize;
float Dummy1;
float WorldDeterminantSign;
});
DrawPass DeferredMaterialShader::GetDrawModes() const
@@ -96,13 +94,6 @@ void DeferredMaterialShader::Bind(BindParameters& params)
materialData->TimeParam = params.TimeParam;
materialData->ViewInfo = view.ViewInfo;
materialData->ScreenSize = view.ScreenSize;
const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length();
const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length();
const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length();
materialData->WorldInvScale = Float3(
scaleX > 0.00001f ? 1.0f / scaleX : 0.0f,
scaleY > 0.00001f ? 1.0f / scaleY : 0.0f,
scaleZ > 0.00001f ? 1.0f / scaleZ : 0.0f);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -32,14 +32,12 @@ PACK_STRUCT(struct ForwardMaterialShaderData {
float TimeParam;
Float4 ViewInfo;
Float4 ScreenSize;
Float3 WorldInvScale;
float WorldDeterminantSign;
Float2 Dummy0;
float LODDitherFactor;
float PerInstanceRandom;
Float4 TemporalAAJitter;
Float3 GeometrySize;
float Dummy1;
float WorldDeterminantSign;
});
DrawPass ForwardMaterialShader::GetDrawModes() const
@@ -104,13 +102,6 @@ void ForwardMaterialShader::Bind(BindParameters& params)
materialData->TimeParam = params.TimeParam;
materialData->ViewInfo = view.ViewInfo;
materialData->ScreenSize = view.ScreenSize;
const float scaleX = Float3(drawCall.World.M11, drawCall.World.M12, drawCall.World.M13).Length();
const float scaleY = Float3(drawCall.World.M21, drawCall.World.M22, drawCall.World.M23).Length();
const float scaleZ = Float3(drawCall.World.M31, drawCall.World.M32, drawCall.World.M33).Length();
materialData->WorldInvScale = Float3(
scaleX > 0.00001f ? 1.0f / scaleX : 0.0f,
scaleY > 0.00001f ? 1.0f / scaleY : 0.0f,
scaleZ > 0.00001f ? 1.0f / scaleZ : 0.0f);
materialData->WorldDeterminantSign = drawCall.WorldDeterminantSign;
materialData->LODDitherFactor = drawCall.Surface.LODDitherFactor;
materialData->PerInstanceRandom = drawCall.PerInstanceRandom;

View File

@@ -117,7 +117,7 @@ public:
/// </summary>
struct InstancingHandler
{
void (*GetHash)(const DrawCall& drawCall, int32& batchKey);
void (*GetHash)(const DrawCall& drawCall, uint32& batchKey);
bool (*CanBatch)(const DrawCall& a, const DrawCall& b);
void (*WriteDrawCall)(struct InstanceData* instanceData, const DrawCall& drawCall);
};

View File

@@ -10,7 +10,7 @@
/// <summary>
/// Current materials shader version.
/// </summary>
#define MATERIAL_GRAPH_VERSION 156
#define MATERIAL_GRAPH_VERSION 157
class Material;
class GPUShader;

View File

@@ -16,11 +16,6 @@
#include "Engine/Level/Scene/Lightmap.h"
#include "Engine/Level/Actors/PostFxVolume.h"
// Amount of bits to use for draw calls batches hash key
#define USE_BATCH_KEY_MASK 0
#define BATCH_KEY_BITS 32
#define BATCH_KEY_MASK ((1 << BATCH_KEY_BITS) - 1)
static_assert(sizeof(DrawCall) <= 280, "Too big draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Terrain), "Wrong draw call data size.");
static_assert(sizeof(DrawCall::Surface) >= sizeof(DrawCall::Particle), "Wrong draw call data size.");
@@ -524,9 +519,10 @@ namespace
}
}
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const DrawCall* drawCalls)
void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls)
{
PROFILE_CPU();
const auto* drawCallsData = drawCalls.Get();
const auto* listData = list.Indices.Get();
const int32 listSize = list.Indices.Count();
const Float3 planeNormal = renderContext.View.Direction;
@@ -544,10 +540,10 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
const uint32 sortKeyXor = reverseDistance ? MAX_uint32 : 0;
for (int32 i = 0; i < listSize; i++)
{
const DrawCall& drawCall = drawCalls[listData[i]];
const DrawCall& drawCall = drawCallsData[listData[i]];
const float distance = Float3::Dot(planeNormal, drawCall.ObjectPosition) - planePoint;
const uint32 sortKey = RenderTools::ComputeDistanceSortKey(distance) ^ sortKeyXor;
int32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
uint32 batchKey = GetHash(drawCall.Geometry.IndexBuffer);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[0]);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[1]);
batchKey = (batchKey * 397) ^ GetHash(drawCall.Geometry.VertexBuffers[2]);
@@ -556,12 +552,7 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
if (drawCall.Material->CanUseInstancing(handler))
handler.GetHash(drawCall, batchKey);
batchKey += (int32)(471 * drawCall.WorldDeterminantSign);
#if USE_BATCH_KEY_MASK
const uint32 batchHashKey = (uint32)batchKey & BATCH_KEY_MASK;
#else
const uint32 batchHashKey = (uint32)batchKey;
#endif
sortedKeys[i] = (uint64)batchHashKey << 32 | (uint64)sortKey;
sortedKeys[i] = (uint64)batchKey << 32 | (uint64)sortKey;
}
// Sort draw calls indices
@@ -574,14 +565,14 @@ void RenderList::SortDrawCalls(const RenderContext& renderContext, bool reverseD
list.Batches.Clear();
for (int32 i = 0; i < listSize;)
{
const DrawCall& drawCall = drawCalls[listData[i]];
const DrawCall& drawCall = drawCallsData[listData[i]];
int32 batchSize = 1;
int32 instanceCount = drawCall.InstanceCount;
// Check the following draw calls to merge them (using instancing)
for (int32 j = i + 1; j < listSize; j++)
{
const DrawCall& other = drawCalls[listData[j]];
const DrawCall& other = drawCallsData[listData[j]];
if (!CanBatchWith(drawCall, other))
break;
@@ -608,11 +599,12 @@ bool CanUseInstancing(DrawPass pass)
return pass == DrawPass::GBuffer || pass == DrawPass::Depth;
}
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const DrawCall* drawCalls, GPUTextureView* input)
void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input)
{
if (list.IsEmpty())
return;
PROFILE_GPU_CPU("Drawing");
const auto* drawCallsData = drawCalls.Get();
const auto* listData = list.Indices.Get();
const auto* batchesData = list.Batches.Get();
const auto context = GPUDevice::Instance->GetMainContext();
@@ -655,10 +647,10 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
if (batch.BatchSize > 1)
{
IMaterial::InstancingHandler handler;
drawCalls[listData[batch.StartIndex]].Material->CanUseInstancing(handler);
drawCallsData[listData[batch.StartIndex]].Material->CanUseInstancing(handler);
for (int32 j = 0; j < batch.BatchSize; j++)
{
auto& drawCall = drawCalls[listData[batch.StartIndex + j]];
auto& drawCall = drawCallsData[listData[batch.StartIndex + j]];
handler.WriteDrawCall(instanceData, drawCall);
instanceData++;
}
@@ -691,7 +683,7 @@ DRAW:
for (int32 i = 0; i < list.Batches.Count(); i++)
{
auto& batch = batchesData[i];
const DrawCall& drawCall = drawCalls[listData[batch.StartIndex]];
const DrawCall& drawCall = drawCallsData[listData[batch.StartIndex]];
int32 vbCount = 0;
while (vbCount < ARRAY_COUNT(drawCall.Geometry.VertexBuffers) && drawCall.Geometry.VertexBuffers[vbCount])
@@ -866,14 +858,16 @@ DRAW:
}
}
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, int32& batchKey)
void SurfaceDrawCallHandler::GetHash(const DrawCall& drawCall, uint32& batchKey)
{
batchKey = (batchKey * 397) ^ ::GetHash(drawCall.Surface.Lightmap);
}
bool SurfaceDrawCallHandler::CanBatch(const DrawCall& a, const DrawCall& b)
{
return a.Surface.Lightmap == b.Surface.Lightmap &&
// TODO: find reason why batching static meshes with lightmap causes problems with sampling in shader (flickering when meshes in batch order gets changes due to async draw calls collection)
return a.Surface.Lightmap == nullptr && b.Surface.Lightmap == nullptr &&
//return a.Surface.Lightmap == b.Surface.Lightmap &&
a.Surface.Skinning == nullptr &&
b.Surface.Skinning == nullptr;
}

View File

@@ -504,7 +504,7 @@ public:
/// <param name="listType">The collected draw calls list type.</param>
API_FUNCTION() FORCE_INLINE void SortDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, bool reverseDistance, DrawCallsListType listType)
{
SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls.Get());
SortDrawCalls(renderContext, reverseDistance, DrawCallsLists[(int32)listType], DrawCalls);
}
/// <summary>
@@ -514,7 +514,7 @@ public:
/// <param name="reverseDistance">If set to <c>true</c> reverse draw call distance to the view. Results in back to front sorting.</param>
/// <param name="list">The collected draw calls indices list.</param>
/// <param name="drawCalls">The collected draw calls list.</param>
void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const DrawCall* drawCalls);
void SortDrawCalls(const RenderContext& renderContext, bool reverseDistance, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls);
/// <summary>
/// Executes the collected draw calls.
@@ -524,7 +524,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
API_FUNCTION() FORCE_INLINE void ExecuteDrawCalls(API_PARAM(Ref) const RenderContext& renderContext, DrawCallsListType listType, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls.Get(), input);
ExecuteDrawCalls(renderContext, DrawCallsLists[(int32)listType], DrawCalls, input);
}
/// <summary>
@@ -535,7 +535,7 @@ public:
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
FORCE_INLINE void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, GPUTextureView* input = nullptr)
{
ExecuteDrawCalls(renderContext, list, DrawCalls.Get(), input);
ExecuteDrawCalls(renderContext, list, DrawCalls, input);
}
/// <summary>
@@ -545,14 +545,14 @@ public:
/// <param name="list">The collected draw calls indices list.</param>
/// <param name="drawCalls">The collected draw calls list.</param>
/// <param name="input">The input scene color. It's optional and used in forward/postFx rendering.</param>
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const DrawCall* drawCalls, GPUTextureView* input);
void ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsList& list, const RenderListBuffer<DrawCall>& drawCalls, GPUTextureView* input);
};
/// <summary>
/// Represents data per instance element used for instanced rendering.
/// </summary>
struct FLAXENGINE_API InstanceData
{
PACK_STRUCT(struct FLAXENGINE_API InstanceData
{
Float3 InstanceOrigin;
float PerInstanceRandom;
Float3 InstanceTransform1;
@@ -560,11 +560,11 @@ struct FLAXENGINE_API InstanceData
Float3 InstanceTransform2;
Float3 InstanceTransform3;
Half4 InstanceLightmapArea;
};
});
struct SurfaceDrawCallHandler
{
static void GetHash(const DrawCall& drawCall, int32& batchKey);
static void GetHash(const DrawCall& drawCall, uint32& batchKey);
static bool CanBatch(const DrawCall& a, const DrawCall& b);
static void WriteDrawCall(InstanceData* instanceData, const DrawCall& drawCall);
};

View File

@@ -373,7 +373,7 @@ void RenderInner(SceneRenderTask* task, RenderContext& renderContext, RenderCont
{
auto& shadowContext = renderContextBatch.Contexts[i];
shadowContext.List->SortDrawCalls(shadowContext, false, DrawCallsListType::Depth);
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get());
shadowContext.List->SortDrawCalls(shadowContext, false, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls);
}
}

View File

@@ -631,7 +631,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererP
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context
@@ -709,7 +709,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererS
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + faceIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context
@@ -779,7 +779,7 @@ void ShadowsPass::RenderShadow(RenderContextBatch& renderContextBatch, RendererD
context->ClearDepth(rt);
auto& shadowContext = renderContextBatch.Contexts[shadowData.ContextIndex + cascadeIndex];
shadowContext.List->ExecuteDrawCalls(shadowContext, DrawCallsListType::Depth);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls.Get(), nullptr);
shadowContext.List->ExecuteDrawCalls(shadowContext, shadowContext.List->ShadowDepthDrawCallsList, renderContext.List->DrawCalls, nullptr);
}
// Restore GPU context

View File

@@ -35,11 +35,9 @@ template<typename T>
class ConcurrentQueue : public moodycamel::ConcurrentQueue<T, ConcurrentQueueSettings>
{
public:
typedef moodycamel::ConcurrentQueue<T, ConcurrentQueueSettings> Base;
public:
/// <summary>
/// Gets an estimate of the total number of elements currently in the queue.
/// </summary>
@@ -52,7 +50,16 @@ public:
/// Adds item to the collection.
/// </summary>
/// <param name="item">The item to add.</param>
FORCE_INLINE void Add(T& item)
FORCE_INLINE void Add(const T& item)
{
enqueue(item);
}
/// <summary>
/// Adds item to the collection.
/// </summary>
/// <param name="item">The item to add.</param>
FORCE_INLINE void Add(T&& item)
{
enqueue(item);
}

View File

@@ -339,6 +339,9 @@ void JobSystem::Wait(int64 label)
WaitMutex.Lock();
WaitSignal.Wait(WaitMutex, 1);
WaitMutex.Unlock();
// Wake up any thread to prevent stalling in highly multi-threaded environment
JobsSignal.NotifyOne();
}
#if JOB_SYSTEM_USE_STATS