Merge remote-tracking branch 'origin/master'

This commit is contained in:
Wojtek Figat
2021-08-31 09:36:44 +02:00
14 changed files with 160 additions and 106 deletions

View File

@@ -89,10 +89,19 @@ namespace AnimationUtils
result = (T)(a + t * (b - a));
}
template<>
FORCE_INLINE void Interpolate<Vector2>(const Vector2& a, const Vector2& b, float t, Vector2& result)
{
result.X = Math::Lerp(a.X, b.X, t);
result.Y = Math::Lerp(a.Y, b.Y, t);
}
template<>
FORCE_INLINE void Interpolate<Vector3>(const Vector3& a, const Vector3& b, float t, Vector3& result)
{
Vector3::Lerp(a, b, t, result);
result.X = Math::Lerp(a.X, b.X, t);
result.Y = Math::Lerp(a.Y, b.Y, t);
result.Y = Math::Lerp(a.Z, b.Z, t);
}
template<>
@@ -191,7 +200,7 @@ namespace AnimationUtils
const float uu = u * u;
const float uuu = uu * u;
const float ttt = tt * t;
result = uuu * p0 + 3 * uu * t * p1 + 3 * u * tt * p2 + ttt * p3;
result = uuu * p0 + (3 * uu * t) * p1 + (3 * u * tt) * p2 + ttt * p3;
}
template<>
@@ -202,7 +211,7 @@ namespace AnimationUtils
const float uu = u * u;
const float uuu = uu * u;
const float ttt = tt * t;
result = uuu * p0 + 3 * uu * t * p1 + 3 * u * tt * p2 + ttt * p3;
result = uuu * p0 + (3 * uu * t) * p1 + (3 * u * tt) * p2 + ttt * p3;
}
template<>

View File

@@ -56,6 +56,10 @@ void AnimationsSystem::Job(int32 index)
auto animatedModel = UpdateList[index];
auto skinnedModel = animatedModel->SkinnedModel.Get();
auto graph = animatedModel->AnimationGraph.Get();
#if COMPILE_WITH_PROFILER && TRACY_ENABLE
const StringView graphName(graph->GetPath());
ZoneName(*graphName, graphName.Length());
#endif
if (graph && graph->IsLoaded() && graph->Graph.CanUseWithSkeleton(skinnedModel)
#if USE_EDITOR
&& graph->Graph.Parameters.Count() == animatedModel->GraphInstance.Parameters.Count() // It may happen in editor so just add safe check to prevent any crashes

View File

@@ -149,6 +149,7 @@ Variant AnimGraphExecutor::SampleAnimation(AnimGraphNode* node, bool loop, float
// Skip if animation is not ready to use
if (anim == nullptr || !anim->IsLoaded())
return Value::Null;
PROFILE_CPU_ASSET(anim);
// Calculate actual time position within the animation node (defined by length and loop mode)
const float pos = GetAnimPos(newTimePos, startTimePos, loop, length);
@@ -607,8 +608,6 @@ void AnimGraphExecutor::ProcessGroupAnimation(Box* boxBase, Node* nodeBase, Valu
// Animation
case 0:
{
ANIM_GRAPH_PROFILE_EVENT("Sample");
const float length = anim ? anim->GetLength() : 0.0f;
// Calculate new time position

View File

@@ -480,11 +480,7 @@ bool Asset::onLoad(LoadAssetTask* task)
// Load asset
LoadResult result;
{
#if TRACY_ENABLE
ZoneScoped;
const StringView name(GetPath());
ZoneName(*name, name.Length());
#endif
PROFILE_CPU_ASSET(this);
result = loadAsset();
}
const bool isLoaded = result == LoadResult::Ok;

View File

@@ -18,9 +18,14 @@ typedef __m128 SimdVector4;
namespace SIMD
{
FORCE_INLINE SimdVector4 Load(float xyzw)
{
return _mm_set1_ps(xyzw);
}
FORCE_INLINE SimdVector4 Load(float x, float y, float z, float w)
{
return _mm_set_ps(x, y, z, w);
return _mm_set_ps(w, z, y, x);
}
FORCE_INLINE SimdVector4 Load(const void* src)
@@ -91,34 +96,39 @@ namespace SIMD
#else
struct SimdFloat4
struct SimdVector4
{
float X, Y, Z, W;
};
namespace SIMD
{
FORCE_INLINE SimdFloat4 Load(float x, float y, float z, float w)
FORCE_INLINE SimdVector4 Load(float xyzw)
{
return { xyzw, xyzw, xyzw, xyzw };
}
FORCE_INLINE SimdVector4 Load(float x, float y, float z, float w)
{
return { x, y, z, w };
}
FORCE_INLINE SimdFloat4 Load(const void* src)
FORCE_INLINE SimdVector4 Load(const void* src)
{
return *(const SimdFloat4*)src;
return *(const SimdVector4*)src;
}
FORCE_INLINE SimdFloat4 Splat(float value)
FORCE_INLINE SimdVector4 Splat(float value)
{
return { value, value, value, value };
}
FORCE_INLINE void Store(void* dst, SimdFloat4 src)
FORCE_INLINE void Store(void* dst, SimdVector4 src)
{
(*(SimdFloat4*)dst) = src;
(*(SimdVector4*)dst) = src;
}
FORCE_INLINE int MoveMask(SimdFloat4 a)
FORCE_INLINE int MoveMask(SimdVector4 a)
{
return (a.W < 0 ? (1 << 3) : 0) |
(a.Z < 0 ? (1 << 2) : 0) |
@@ -126,7 +136,7 @@ namespace SIMD
(a.X < 0 ? 1 : 0);
}
FORCE_INLINE SimdFloat4 Add(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Add(SimdVector4 a, SimdVector4 b)
{
return
{
@@ -137,7 +147,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Sub(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Sub(SimdVector4 a, SimdVector4 b)
{
return
{
@@ -148,7 +158,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Mul(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Mul(SimdVector4 a, SimdVector4 b)
{
return
{
@@ -159,7 +169,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Div(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Div(SimdVector4 a, SimdVector4 b)
{
return
{
@@ -170,7 +180,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Rcp(SimdFloat4 a)
FORCE_INLINE SimdVector4 Rcp(SimdVector4 a)
{
return
{
@@ -181,7 +191,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Sqrt(SimdFloat4 a)
FORCE_INLINE SimdVector4 Sqrt(SimdVector4 a)
{
return
{
@@ -192,7 +202,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Rsqrt(SimdFloat4 a)
FORCE_INLINE SimdVector4 Rsqrt(SimdVector4 a)
{
return
{
@@ -203,7 +213,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Min(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Min(SimdVector4 a, SimdVector4 b)
{
return
{
@@ -214,7 +224,7 @@ namespace SIMD
};
}
FORCE_INLINE SimdFloat4 Max(SimdFloat4 a, SimdFloat4 b)
FORCE_INLINE SimdVector4 Max(SimdVector4 a, SimdVector4 b)
{
return
{

View File

@@ -14,6 +14,15 @@
#define RAND3 Vector3(RAND, RAND, RAND)
#define RAND4 Vector4(RAND, RAND, RAND, RAND)
// Enable to insert CPU profiler events for particles modules
#define PARTICLE_EMITTER_MODULES_PROFILE 0
#if PARTICLE_EMITTER_MODULES_PROFILE
#include "Engine/Profiler/ProfilerCPU.h"
#define PARTICLE_EMITTER_MODULE(name) PROFILE_CPU_NAMED(name)
#else
#define PARTICLE_EMITTER_MODULE(name)
#endif
namespace
{
FORCE_INLINE Vector4 Mod289(Vector4 x)
@@ -181,6 +190,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
case 201:
case 303:
{
PARTICLE_EMITTER_MODULE("Orient Sprite");
auto spriteFacingMode = node->Values[2].AsInt;
{
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
@@ -223,6 +233,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
case 213:
case 309:
{
PARTICLE_EMITTER_MODULE("Orient Model");
auto modelFacingMode = node->Values[2].AsInt;
{
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
@@ -238,6 +249,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Update Age
case 300:
{
PARTICLE_EMITTER_MODULE("Update Age");
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* agePtr = start + attribute.Offset;
for (int32 particleIndex = particlesStart; particleIndex < particlesEnd; particleIndex++)
@@ -251,6 +263,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
case 301:
case 304:
{
PARTICLE_EMITTER_MODULE("Gravity/Force");
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* velocityPtr = start + attribute.Offset;
auto box = node->GetBox(0);
@@ -278,6 +291,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Conform to Sphere
case 305:
{
PARTICLE_EMITTER_MODULE("Conform to Sphere");
auto& position = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
auto& velocity = context.Data->Buffer->Layout->Attributes[node->Attributes[1]];
auto& mass = context.Data->Buffer->Layout->Attributes[node->Attributes[2]];
@@ -340,6 +354,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Kill (sphere)
case 306:
{
PARTICLE_EMITTER_MODULE("Kill");
auto& position = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + position.Offset;
@@ -388,6 +403,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Kill (box)
case 307:
{
PARTICLE_EMITTER_MODULE("Kill");
auto& position = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + position.Offset;
@@ -441,6 +457,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Kill (custom)
case 308:
{
PARTICLE_EMITTER_MODULE("Kill (custom)");
auto killBox = node->GetBox(0);
#define INPUTS_FETCH() \
@@ -478,6 +495,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Linear Drag
case 310:
{
PARTICLE_EMITTER_MODULE("Linear Drag");
auto box = node->GetBox(0);
const bool useSpriteSize = node->Values[3].AsBool;
@@ -523,6 +541,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Turbulence
case 311:
{
PARTICLE_EMITTER_MODULE("Turbulence");
auto& position = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
auto& velocity = context.Data->Buffer->Layout->Attributes[node->Attributes[1]];
auto& mass = context.Data->Buffer->Layout->Attributes[node->Attributes[2]];
@@ -583,6 +602,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
case 200:
case 302:
{
PARTICLE_EMITTER_MODULE("Set Attribute");
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* dataPtr = start + attribute.Offset;
int32 dataSize = attribute.GetSize();
@@ -590,10 +610,11 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
ValueType type(GetVariantType(attribute.ValueType));
if (node->UsePerParticleDataResolve())
{
Value value;
for (int32 particleIndex = particlesStart; particleIndex < particlesEnd; particleIndex++)
{
context.ParticleIndex = particleIndex;
const Value value = GetValue(box, 4).Cast(type);
value = GetValue(box, 4).Cast(type);
Platform::MemoryCopy(dataPtr, &value.AsPointer, dataSize);
dataPtr += stride;
}
@@ -639,6 +660,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
case 362:
case 363:
{
PARTICLE_EMITTER_MODULE("Set");
auto& attribute = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* dataPtr = start + attribute.Offset;
int32 dataSize = attribute.GetSize();
@@ -646,10 +668,11 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
ValueType type(GetVariantType(attribute.ValueType));
if (node->UsePerParticleDataResolve())
{
Value value;
for (int32 particleIndex = particlesStart; particleIndex < particlesEnd; particleIndex++)
{
context.ParticleIndex = particleIndex;
const Value value = GetValue(box, 2).Cast(type);
value = GetValue(box, 2).Cast(type);
Platform::MemoryCopy(dataPtr, &value.AsPointer, dataSize);
dataPtr += stride;
}
@@ -668,6 +691,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (sphere surface)
case 202:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -713,6 +737,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (plane)
case 203:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -751,6 +776,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (circle)
case 204:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -794,6 +820,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (disc)
case 205:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -837,6 +864,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (box surface)
case 206:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -887,6 +915,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (box volume)
case 207:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -925,6 +954,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (cylinder)
case 208:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -970,6 +1000,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (line)
case 209:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -1008,6 +1039,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (torus)
case 210:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -1072,6 +1104,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (sphere volume)
case 211:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
byte* positionPtr = start + positionAttr.Offset;
@@ -1123,6 +1156,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Position (spiral)
case 214:
{
PARTICLE_EMITTER_MODULE("Position");
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]];
auto& velocityAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[1]];
@@ -1173,6 +1207,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessModule(ParticleEmitterGraphCPUNode*
// Helper macros for collision modules to share the code
#define COLLISION_BEGIN() \
PARTICLE_EMITTER_MODULE("Collision"); \
auto& positionAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[0]]; \
auto& velocityAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[1]]; \
auto& ageAttr = context.Data->Buffer->Layout->Attributes[node->Attributes[2]]; \

View File

@@ -419,7 +419,7 @@ void ParticleEmitterGraphCPUExecutor::ProcessGroupFunction(Box* box, Node* node,
Node* functionCallNode = nullptr;
ASSERT(context.GraphStack.Count() >= 2);
Graph* graph;
for (int32 i = context.CallStack.Count() - 1; i >= 0; i--)
for (int32 i = context.CallStackSize - 1; i >= 0; i--)
{
if (context.CallStack[i]->Type == GRAPH_NODE_MAKE_TYPE(14, 300) && context.Functions.TryGet(context.CallStack[i], graph) && context.GraphStack[context.GraphStack.Count() - 1] == graph)
{

View File

@@ -6,6 +6,7 @@
#include "Engine/Renderer/RenderList.h"
#include "Engine/Particles/ParticleEffect.h"
#include "Engine/Engine/Time.h"
#include "Engine/Profiler/ProfilerCPU.h"
ThreadLocal<ParticleEmitterGraphCPUContext> ParticleEmitterGraphCPUExecutor::Context;
@@ -130,7 +131,7 @@ void ParticleEmitterGraphCPUExecutor::Init(ParticleEmitter* emitter, ParticleEff
context.DeltaTime = dt;
context.ParticleIndex = 0;
context.ViewTask = effect->GetRenderTask();
context.CallStack.Clear();
context.CallStackSize = 0;
context.Functions.Clear();
}
@@ -433,6 +434,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
// Update particles
if (cpu.Count > 0)
{
PROFILE_CPU_NAMED("Update");
for (int32 i = 0; i < _graph.UpdateModules.Count(); i++)
{
ProcessModule(_graph.UpdateModules[i], 0, cpu.Count);
@@ -442,6 +444,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
// Dead particles removal
if (_graph._attrAge != -1 && _graph._attrLifetime != -1)
{
PROFILE_CPU_NAMED("Age kill");
byte* agePtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrAge].Offset;
byte* lifetimePtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrLifetime].Offset;
for (int32 particleIndex = 0; particleIndex < cpu.Count; particleIndex++)
@@ -477,6 +480,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
// Euler integration
if (_graph._attrPosition != -1 && _graph._attrVelocity != -1)
{
PROFILE_CPU_NAMED("Euler Integration");
byte* positionPtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrPosition].Offset;
byte* velocityPtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrVelocity].Offset;
for (int32 particleIndex = 0; particleIndex < cpu.Count; particleIndex++)
@@ -490,6 +494,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
// Angular Euler Integration
if (_graph._attrRotation != -1 && _graph._attrAngularVelocity != -1)
{
PROFILE_CPU_NAMED("Angular Euler Integration");
byte* rotationPtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrRotation].Offset;
byte* angularVelocityPtr = cpu.Buffer.Get() + data.Buffer->Layout->Attributes[_graph._attrAngularVelocity].Offset;
for (int32 particleIndex = 0; particleIndex < cpu.Count; particleIndex++)
@@ -504,6 +509,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
int32 spawnCount = 0;
if (canSpawn)
{
PROFILE_CPU_NAMED("Spawn");
for (int32 i = 0; i < _graph.SpawnModules.Count(); i++)
{
spawnCount += ProcessSpawnModule(i);
@@ -514,6 +520,8 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
spawnCount = countAfter - countBefore;
if (spawnCount != 0)
{
PROFILE_CPU_NAMED("Init");
// Spawn particles
data.Buffer->CPU.Count = countAfter;
@@ -533,6 +541,7 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
if (_graph.RibbonRenderingModules.HasItems())
{
// Sort ribbon particles
PROFILE_CPU_NAMED("Ribbon");
if (cpu.RibbonOrder.IsEmpty())
{
cpu.RibbonOrder.Resize(_graph.RibbonRenderingModules.Count() * data.Buffer->Capacity);
@@ -559,6 +568,8 @@ void ParticleEmitterGraphCPUExecutor::Update(ParticleEmitter* emitter, ParticleE
int32 ParticleEmitterGraphCPUExecutor::UpdateSpawn(ParticleEmitter* emitter, ParticleEffect* effect, ParticleEmitterInstance& data, float dt)
{
PROFILE_CPU_NAMED("Spawn");
// Prepare data
auto& context = Context.Get();
Init(emitter, effect, data, dt);
@@ -577,7 +588,7 @@ VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box
{
// Check if graph is looped or is too deep
auto& context = Context.Get();
if (context.CallStack.Count() >= PARTICLE_EMITTER_MAX_CALL_STACK)
if (context.CallStackSize >= PARTICLE_EMITTER_MAX_CALL_STACK)
{
OnError(caller, box, TEXT("Graph is looped or too deep!"));
return Value::Zero;
@@ -591,7 +602,7 @@ VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box
#endif
// Add to the calling stack
context.CallStack.Add(caller);
context.CallStack[context.CallStackSize++] = caller;
// Call per group custom processing event
Value value;
@@ -600,7 +611,7 @@ VisjectExecutor::Value ParticleEmitterGraphCPUExecutor::eatBox(Node* caller, Box
(this->*func)(box, parentNode, value);
// Remove from the calling stack
context.CallStack.RemoveLast();
context.CallStackSize--;
return value;
}

View File

@@ -120,9 +120,10 @@ struct ParticleEmitterGraphCPUContext
ParticleEmitter* Emitter;
ParticleEffect* Effect;
class SceneRenderTask* ViewTask;
Array<VisjectExecutor::Node*, FixedAllocation<PARTICLE_EMITTER_MAX_CALL_STACK>> CallStack;
Array<VisjectExecutor::Graph*, FixedAllocation<32>> GraphStack;
Dictionary<VisjectExecutor::Node*, VisjectExecutor::Graph*> Functions;
int32 CallStackSize = 0;
VisjectExecutor::Node* CallStack[PARTICLE_EMITTER_MAX_CALL_STACK];
};
/// <summary>

View File

@@ -1211,6 +1211,10 @@ void ParticlesSystem::Job(int32 index)
}
if (anyEmitterNotReady)
return;
#if COMPILE_WITH_PROFILER && TRACY_ENABLE
const StringView particleSystemName(particleSystem->GetPath());
ZoneName(*particleSystemName, particleSystemName.Length());
#endif
// Prepare instance data
instance.Sync(particleSystem);
@@ -1287,6 +1291,7 @@ void ParticlesSystem::Job(int32 index)
auto& data = instance.Emitters[track.AsEmitter.Index];
ASSERT(emitter && emitter->IsLoaded());
ASSERT(emitter->Capacity != 0 && emitter->Graph.Layout.Size != 0);
PROFILE_CPU_ASSET(emitter);
// Calculate new time position
const float startTime = (float)track.AsEmitter.StartFrame / fps;

View File

@@ -20,6 +20,8 @@
#include <intrin.h>
#pragma comment(lib, "Iphlpapi.lib")
static_assert(sizeof(int32) == sizeof(long), "Invalid long size for Interlocked and Atomic operations in Win32Platform.");
namespace
{
Guid DeviceId;
@@ -239,59 +241,6 @@ void Win32Platform::MemoryBarrier()
#endif
}
int64 Win32Platform::InterlockedExchange(int64 volatile* dst, int64 exchange)
{
return InterlockedExchange64(dst, exchange);
}
int32 Win32Platform::InterlockedCompareExchange(int32 volatile* dst, int32 exchange, int32 comperand)
{
static_assert(sizeof(int32) == sizeof(LONG), "Invalid LONG size.");
return _InterlockedCompareExchange((LONG volatile*)dst, exchange, comperand);
}
int64 Win32Platform::InterlockedCompareExchange(int64 volatile* dst, int64 exchange, int64 comperand)
{
return InterlockedCompareExchange64(dst, exchange, comperand);
}
int64 Win32Platform::InterlockedIncrement(int64 volatile* dst)
{
return InterlockedIncrement64(dst);
}
int64 Win32Platform::InterlockedDecrement(int64 volatile* dst)
{
return InterlockedDecrement64(dst);
}
int64 Win32Platform::InterlockedAdd(int64 volatile* dst, int64 value)
{
return InterlockedExchangeAdd64(dst, value);
}
int32 Win32Platform::AtomicRead(int32 volatile* dst)
{
static_assert(sizeof(int32) == sizeof(LONG), "Invalid LONG size.");
return _InterlockedCompareExchange((LONG volatile*)dst, 0, 0);
}
int64 Win32Platform::AtomicRead(int64 volatile* dst)
{
return InterlockedCompareExchange64(dst, 0, 0);
}
void Win32Platform::AtomicStore(int32 volatile* dst, int32 value)
{
static_assert(sizeof(int32) == sizeof(LONG), "Invalid LONG size.");
_InterlockedExchange((LONG volatile*)dst, value);
}
void Win32Platform::AtomicStore(int64 volatile* dst, int64 value)
{
InterlockedExchange64(dst, value);
}
void Win32Platform::Prefetch(void const* ptr)
{
_mm_prefetch((char const*)ptr, _MM_HINT_T0);
@@ -387,11 +336,6 @@ uint64 Win32Platform::GetCurrentProcessId()
return ::GetCurrentProcessId();
}
uint64 Win32Platform::GetCurrentThreadID()
{
return ::GetCurrentThreadId();
}
void Win32Platform::SetThreadPriority(ThreadPriority priority)
{
int32 winPriority;

View File

@@ -5,6 +5,12 @@
#if PLATFORM_WIN32
#include "Engine/Platform/Base/PlatformBase.h"
#if _MSC_VER <= 1900
#include <intrin.h>
#else
#include <intrin0.h>
#endif
extern "C" __declspec(dllimport) unsigned long __stdcall GetCurrentThreadId(void);
/// <summary>
/// The Win32 platform implementation and application management utilities.
@@ -17,16 +23,46 @@ public:
static bool Init();
static void Exit();
static void MemoryBarrier();
static int64 InterlockedExchange(int64 volatile* dst, int64 exchange);
static int32 InterlockedCompareExchange(int32 volatile* dst, int32 exchange, int32 comperand);
static int64 InterlockedCompareExchange(int64 volatile* dst, int64 exchange, int64 comperand);
static int64 InterlockedIncrement(int64 volatile* dst);
static int64 InterlockedDecrement(int64 volatile* dst);
static int64 InterlockedAdd(int64 volatile* dst, int64 value);
static int32 AtomicRead(int32 volatile* dst);
static int64 AtomicRead(int64 volatile* dst);
static void AtomicStore(int32 volatile* dst, int32 value);
static void AtomicStore(int64 volatile* dst, int64 value);
static int64 InterlockedExchange(int64 volatile* dst, int64 exchange)
{
return _InterlockedExchange64(dst, exchange);
}
static int32 InterlockedCompareExchange(int32 volatile* dst, int32 exchange, int32 comperand)
{
return _InterlockedCompareExchange((long volatile*)dst, exchange, comperand);
}
static int64 InterlockedCompareExchange(int64 volatile* dst, int64 exchange, int64 comperand)
{
return _InterlockedCompareExchange64(dst, exchange, comperand);
}
static int64 InterlockedIncrement(int64 volatile* dst)
{
return _InterlockedExchangeAdd64(dst, 1) + 1;
}
static int64 InterlockedDecrement(int64 volatile* dst)
{
return _InterlockedExchangeAdd64(dst, -1) - 1;
}
static int64 InterlockedAdd(int64 volatile* dst, int64 value)
{
return _InterlockedExchangeAdd64(dst, value);
}
static int32 AtomicRead(int32 volatile* dst)
{
return (int32)_InterlockedCompareExchange((long volatile*)dst, 0, 0);
}
static int64 AtomicRead(int64 volatile* dst)
{
return _InterlockedCompareExchange64(dst, 0, 0);
}
static void AtomicStore(int32 volatile* dst, int32 value)
{
_InterlockedExchange((long volatile*)dst, value);
}
static void AtomicStore(int64 volatile* dst, int64 value)
{
_InterlockedExchange64(dst, value);
}
static void Prefetch(void const* ptr);
static void* Allocate(uint64 size, uint64 alignment);
static void Free(void* ptr);
@@ -38,7 +74,10 @@ public:
static MemoryStats GetMemoryStats();
static ProcessMemoryStats GetProcessMemoryStats();
static uint64 GetCurrentProcessId();
static uint64 GetCurrentThreadID();
static uint64 GetCurrentThreadID()
{
return GetCurrentThreadId();
}
static void SetThreadPriority(ThreadPriority priority);
static void SetThreadAffinityMask(uint64 affinityMask);
static void Sleep(int32 milliseconds);

View File

@@ -402,8 +402,10 @@ struct TIsPODType<ProfilerCPU::Event>
#ifdef TRACY_ENABLE
#define PROFILE_CPU_SRC_LOC(srcLoc) tracy::ScopedZone ___tracy_scoped_zone( (tracy::SourceLocationData*)&(srcLoc) ); ScopeProfileBlockCPU ProfileBlockCPU((srcLoc).name)
#define PROFILE_CPU_ASSET(asset) ZoneScoped; const StringView __tracy_asset_name((asset)->GetPath()); ZoneName(*__tracy_asset_name, __tracy_asset_name.Length())
#else
#define PROFILE_CPU_SRC_LOC(srcLoc) ScopeProfileBlockCPU ProfileBlockCPU((srcLoc).name)
#define PROFILE_CPU_ASSET(asset)
#endif
#else

View File

@@ -81,7 +81,6 @@ protected:
FORCE_INLINE int32 GetIndex()
{
ASSERT(Count() < MaxThreads);
int64 key = (int64)Platform::GetCurrentThreadID();
auto index = Hash(key);
while (true)