Merge remote-tracking branch 'origin/master'

2025-12-09 09:48:05 +01:00
parent 32bd72fecd 56066a3212
commit 188b635ea0
14 changed files with 130 additions and 14 deletions
--- a/Source/Engine/Core/Collections/Array.h
+++ b/Source/Engine/Core/Collections/Array.h
@@ -658,7 +658,10 @@ public:
        --_count;
        T* data = _allocation.Get();
        if (index < _count)
-            Memory::MoveAssignItems(data + index, data + (index + 1), _count - index);
+        {
+            for (int32 i = index; i < _count; i++)
+                data[i] = MoveTemp(data[i + 1]);
+        }
        Memory::DestructItems(data + _count, 1);
    }

--- a/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp
+++ b/Source/Engine/Graphics/Materials/DeferredMaterialShader.cpp
@@ -201,6 +201,7 @@ bool DeferredMaterialShader::Load()
    psDesc.DepthWriteEnable = true;
    psDesc.DepthEnable = true;
    psDesc.DepthFunc = ComparisonFunc::Less;
+    psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::None;
    psDesc.HS = nullptr;
    psDesc.DS = nullptr;
    GPUShaderProgramVS* instancedDepthPassVS;
--- a/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp
+++ b/Source/Engine/Graphics/Materials/ForwardMaterialShader.cpp
@@ -195,5 +195,10 @@ bool ForwardMaterialShader::Load()
    psDesc.VS = _shader->GetVS("VS_Skinned");
    _cache.DepthSkinned.Init(psDesc);

+#if PLATFORM_PS5
+    // Fix shader binding issues on forward shading materials on PS5
+    _drawModes = DrawPass::None;
+#endif
+
    return false;
 }
--- a/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp
+++ b/Source/Engine/Graphics/Materials/ParticleMaterialShader.cpp
@@ -264,5 +264,10 @@ bool ParticleMaterialShader::Load()
    // Lazy initialization
    _cacheVolumetricFog.Desc.PS = nullptr;

+#if PLATFORM_PS5
+    // Fix shader binding issues on forward shading materials on PS5
+    _drawModes = DrawPass::None;
+#endif
+
    return false;
 }
--- a/Source/Engine/Graphics/RenderBuffers.cpp
+++ b/Source/Engine/Graphics/RenderBuffers.cpp
@@ -113,7 +113,8 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context)

 PixelFormat RenderBuffers::GetOutputFormat() const
 {
-    return _useAlpha ? PixelFormat::R16G16B16A16_Float : PixelFormat::R11G11B10_Float;
+    // TODO: fix incorrect alpha leaking into reflections on PS5 with R11G11B10_Float
+    return _useAlpha || PLATFORM_PS5 ? PixelFormat::R16G16B16A16_Float : PixelFormat::R11G11B10_Float;
 }

 bool RenderBuffers::GetUseAlpha() const
--- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp
+++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.cpp
@@ -216,20 +216,21 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
    return result;
 }

-GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride)
+GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder)
 {
    GPUVertexLayout* result = base ? base : reference;
    if (base && reference && base != reference)
    {
        bool elementsModified = false;
        Elements newElements = base->GetElements();
+        const Elements& refElements = reference->GetElements();
        if (removeUnused)
        {
            for (int32 i = newElements.Count() - 1; i >= 0; i--)
            {
                bool missing = true;
                const VertexElement& e = newElements.Get()[i];
-                for (const VertexElement& ee : reference->GetElements())
+                for (const VertexElement& ee : refElements)
                {
                    if (ee.Type == e.Type)
                    {
@@ -247,7 +248,7 @@ GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout*
        }
        if (addMissing)
        {
-            for (const VertexElement& e : reference->GetElements())
+            for (const VertexElement& e : refElements)
            {
                bool missing = true;
                for (const VertexElement& ee : base->GetElements())
@@ -282,6 +283,32 @@ GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout*
                }
            }
        }
+        if (referenceOrder)
+        {
+            for (int32 i = 0, j = 0; i < newElements.Count() && j < refElements.Count(); j++)
+            {
+                if (newElements[i].Type == refElements[j].Type)
+                {
+                    // Elements match so move forward
+                    i++;
+                    continue;
+                }
+
+                // Find reference element in a new list
+                for (int32 k = i + 1; k < newElements.Count(); k++)
+                {
+                    if (newElements[k].Type == refElements[j].Type)
+                    {
+                        // Move matching element to the reference position
+                        VertexElement e = newElements[k];
+                        newElements.RemoveAt(k);
+                        newElements.Insert(i, e);
+                        i++;
+                        break;
+                    }
+                }
+            }
+        }
        if (elementsModified)
            result = Get(newElements, true);
    }
--- a/Source/Engine/Graphics/Shaders/GPUVertexLayout.h
+++ b/Source/Engine/Graphics/Shaders/GPUVertexLayout.h
@@ -84,8 +84,9 @@ public:
    /// <param name="removeUnused">True to remove elements from base layout that don't exist in a reference layout.</param>
    /// <param name="addMissing">True to add missing elements to base layout that exist in a reference layout.</param>
    /// <param name="missingSlotOverride">Allows to override the input slot for missing elements. Use value -1 to inherit slot from the reference layout.</param>
+    /// <param name="referenceOrder">True to reorder result elements to match the reference layout. For example, if input vertex buffer layout is different than vertex shader then it can match those.</param>
    /// <returns>Vertex layout object. Doesn't need to be cleared as it's cached for an application lifetime.</returns>
-    static GPUVertexLayout* Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused = false, bool addMissing = true, int32 missingSlotOverride = -1);
+    static GPUVertexLayout* Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused = false, bool addMissing = true, int32 missingSlotOverride = -1, bool referenceOrder = false);

 public:
    // [GPUResource]
--- a/Source/Engine/Platform/Base/PlatformBase.cpp
+++ b/Source/Engine/Platform/Base/PlatformBase.cpp
@@ -51,6 +51,7 @@ Array<User*, FixedAllocation<8>> PlatformBase::Users;
 Delegate<User*> PlatformBase::UserAdded;
 Delegate<User*> PlatformBase::UserRemoved;
 void* OutOfMemoryBuffer = nullptr;
+volatile int64 FatalReporting = 0;

 const Char* ToString(NetworkConnectionType value)
 {
@@ -306,11 +307,20 @@ int32 PlatformBase::GetCacheLineSize()

 void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType error)
 {
+    // Let only one thread to report the error (and wait for it to end to have valid log before crash)
+RETRY:
+    if (Platform::InterlockedCompareExchange(&FatalReporting, 1, 0) != 0)
+    {
+        Platform::Sleep(1);
+        goto RETRY;
+    }
+
    // Check if is already during fatal state
    if (Engine::FatalError != FatalErrorType::None)
    {
        // Just send one more error to the log and back
        LOG(Error, "Error after fatal error: {0}", msg);
+        Platform::AtomicStore(&FatalReporting, 0);
        return;
    }

@@ -429,6 +439,8 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er
    }
 #endif

+    Platform::AtomicStore(&FatalReporting, 0);
+
    // Show error message
    if (Engine::ReportCrash.IsBinded())
        Engine::ReportCrash(msg, context);
--- a/Source/Engine/Renderer/PostProcessingPass.cpp
+++ b/Source/Engine/Renderer/PostProcessingPass.cpp
@@ -375,6 +375,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
        RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom");
        RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom");

+        // TODO: skip this clear? or do it at once for the whole textures (2 calls instead of per-mip)
        for (int32 mip = 0; mip < bloomMipCount; mip++)
        {
            context->Clear(bloomBuffer1->View(0, mip), Color::Transparent);
--- a/Source/Engine/Renderer/RenderList.cpp
+++ b/Source/Engine/Renderer/RenderList.cpp
@@ -917,6 +917,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
    perDraw.DrawPadding = Float3::Zero;
    GPUConstantBuffer* perDrawCB = IMaterial::BindParameters::PerDrawConstants;
    context->BindCB(2, perDrawCB); // TODO: use rootSignature/pushConstants on D3D12/Vulkan
+    context->UpdateCB(perDrawCB, &perDraw);
    constexpr int32 vbMax = ARRAY_COUNT(DrawCall::Geometry.VertexBuffers);
    if (useInstancing)
    {
@@ -1057,7 +1058,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
        materialBinds += list.PreBatchedDrawCalls.Count();
        if (list.Batches.IsEmpty() && list.Indices.Count() != 0)
        {
-            // Draw calls list has bot been batched so execute draw calls separately
+            // Draw calls list has not been batched so execute draw calls separately
            for (int32 j = 0; j < list.Indices.Count(); j++)
            {
                perDraw.DrawObjectIndex = listData[j];
--- a/Source/Engine/Renderer/RenderList.h
+++ b/Source/Engine/Renderer/RenderList.h
@@ -273,7 +273,7 @@ struct DrawCallsList
    /// <summary>
    /// True if draw calls batches list can be rendered using hardware instancing, otherwise false.
    /// </summary>
-    bool CanUseInstancing;
+    bool CanUseInstancing = true;

    void Clear();
    bool IsEmpty() const;
--- a/Source/Engine/Scripting/Runtime/DotNet.cpp
+++ b/Source/Engine/Scripting/Runtime/DotNet.cpp
@@ -2137,6 +2137,53 @@ static void* OnMonoDlFallbackClose(void* handle, void* user_data)

 #endif

+#ifdef USE_MONO_AOT_MODULE
+
+#include "Engine/Threading/ThreadPoolTask.h"
+#include "Engine/Engine/EngineService.h"
+
+class MonoAotPreloadTask : public ThreadPoolTask
+{
+public:
+    bool Run() override;
+};
+
+// Preloads in-build AOT dynamic module in async 
+class MonoAotPreloadService : public EngineService
+{
+public:
+    volatile int64 Ready = 0;
+    void* Library = nullptr;
+
+    MonoAotPreloadService()
+        : EngineService(TEXT("AOT Preload"), -800)
+    {
+    }
+
+    bool Init() override
+    {
+        New<MonoAotPreloadTask>()->Start();
+        return false;
+    }
+};
+
+MonoAotPreloadService MonoAotPreloadServiceInstance;
+
+bool MonoAotPreloadTask::Run()
+{
+    // Load AOT module
+    Stopwatch aotModuleLoadStopwatch;
+    //LOG(Info, "Loading Mono AOT module...");
+    MonoAotPreloadServiceInstance.Library = Platform::LoadLibrary(TEXT(USE_MONO_AOT_MODULE));
+    aotModuleLoadStopwatch.Stop();
+    LOG(Info, "Mono AOT module loaded in {0}ms", aotModuleLoadStopwatch.GetMilliseconds());
+
+    Platform::AtomicStore(&MonoAotPreloadServiceInstance.Ready, 1);
+    return false;
+}
+
+#endif
+
 bool InitHostfxr()
 {
 #if DOTNET_HOST_MONO_DEBUG
@@ -2167,10 +2214,12 @@ bool InitHostfxr()
 #endif

 #ifdef USE_MONO_AOT_MODULE
-    // Load AOT module
-    Stopwatch aotModuleLoadStopwatch;
-    LOG(Info, "Loading Mono AOT module...");
-    void* libAotModule = Platform::LoadLibrary(TEXT(USE_MONO_AOT_MODULE));
+    // Wait for AOT module preloading
+    while (Platform::AtomicRead(&MonoAotPreloadServiceInstance.Ready) == 0)
+        Platform::Yield();
+
+    // Initialize AOT module
+    void* libAotModule = MonoAotPreloadServiceInstance.Library;
    if (libAotModule == nullptr)
    {
        LOG(Error, "Failed to laod Mono AOT module (" TEXT(USE_MONO_AOT_MODULE) ")");
@@ -2193,8 +2242,6 @@ bool InitHostfxr()
        mono_aot_register_module((void**)modules[i]);
    }
    Allocator::Free(modules);
-    aotModuleLoadStopwatch.Stop();
-    LOG(Info, "Mono AOT module loaded in {0}ms", aotModuleLoadStopwatch.GetMilliseconds());
 #endif

    // Setup debugger
--- a/Source/Engine/ShadersCompilation/ShaderCompiler.cpp
+++ b/Source/Engine/ShadersCompilation/ShaderCompiler.cpp
@@ -278,6 +278,17 @@ bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* co
    return false;
 }

+bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache1, int32 cache1Size, const void* cache2, int32 cache2Size)
+{
+    auto output = context->Output;
+    output->Write((uint32)(cache1Size + cache2Size + headerSize));
+    output->WriteBytes(header, headerSize);
+    output->WriteBytes(cache1, cache1Size);
+    output->WriteBytes(cache2, cache2Size);
+    output->Write(bindings);
+    return false;
+}
+
 bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* cache, int32 cacheSize)
 {
    auto output = context->Output;
--- a/Source/Engine/ShadersCompilation/ShaderCompiler.h
+++ b/Source/Engine/ShadersCompilation/ShaderCompiler.h
@@ -108,6 +108,7 @@ protected:

    static bool WriteShaderFunctionBegin(ShaderCompilationContext* context, ShaderFunctionMeta& meta);
    static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache, int32 cacheSize);
+    static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache1, int32 cache1Size, const void* cache2, int32 cache2Size);
    static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* cache, int32 cacheSize);
    static bool WriteShaderFunctionEnd(ShaderCompilationContext* context, ShaderFunctionMeta& meta);
    static bool WriteCustomDataVS(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const Array<ShaderMacro>& macros, void* additionalData);