Merge remote-tracking branch 'origin/master'

This commit is contained in:
Wojtek Figat
2025-12-09 09:48:05 +01:00
14 changed files with 130 additions and 14 deletions

View File

@@ -658,7 +658,10 @@ public:
--_count;
T* data = _allocation.Get();
if (index < _count)
Memory::MoveAssignItems(data + index, data + (index + 1), _count - index);
{
for (int32 i = index; i < _count; i++)
data[i] = MoveTemp(data[i + 1]);
}
Memory::DestructItems(data + _count, 1);
}

View File

@@ -201,6 +201,7 @@ bool DeferredMaterialShader::Load()
psDesc.DepthWriteEnable = true;
psDesc.DepthEnable = true;
psDesc.DepthFunc = ComparisonFunc::Less;
psDesc.BlendMode.RenderTargetWriteMask = BlendingMode::ColorWrite::None;
psDesc.HS = nullptr;
psDesc.DS = nullptr;
GPUShaderProgramVS* instancedDepthPassVS;

View File

@@ -195,5 +195,10 @@ bool ForwardMaterialShader::Load()
psDesc.VS = _shader->GetVS("VS_Skinned");
_cache.DepthSkinned.Init(psDesc);
#if PLATFORM_PS5
// Fix shader binding issues on forward shading materials on PS5
_drawModes = DrawPass::None;
#endif
return false;
}

View File

@@ -264,5 +264,10 @@ bool ParticleMaterialShader::Load()
// Lazy initialization
_cacheVolumetricFog.Desc.PS = nullptr;
#if PLATFORM_PS5
// Fix shader binding issues on forward shading materials on PS5
_drawModes = DrawPass::None;
#endif
return false;
}

View File

@@ -113,7 +113,8 @@ GPUTexture* RenderBuffers::RequestHalfResDepth(GPUContext* context)
PixelFormat RenderBuffers::GetOutputFormat() const
{
return _useAlpha ? PixelFormat::R16G16B16A16_Float : PixelFormat::R11G11B10_Float;
// TODO: fix incorrect alpha leaking into reflections on PS5 with R11G11B10_Float
return _useAlpha || PLATFORM_PS5 ? PixelFormat::R16G16B16A16_Float : PixelFormat::R11G11B10_Float;
}
bool RenderBuffers::GetUseAlpha() const

View File

@@ -216,20 +216,21 @@ GPUVertexLayout* GPUVertexLayout::Get(const Span<GPUVertexLayout*>& layouts)
return result;
}
GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride)
GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused, bool addMissing, int32 missingSlotOverride, bool referenceOrder)
{
GPUVertexLayout* result = base ? base : reference;
if (base && reference && base != reference)
{
bool elementsModified = false;
Elements newElements = base->GetElements();
const Elements& refElements = reference->GetElements();
if (removeUnused)
{
for (int32 i = newElements.Count() - 1; i >= 0; i--)
{
bool missing = true;
const VertexElement& e = newElements.Get()[i];
for (const VertexElement& ee : reference->GetElements())
for (const VertexElement& ee : refElements)
{
if (ee.Type == e.Type)
{
@@ -247,7 +248,7 @@ GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout*
}
if (addMissing)
{
for (const VertexElement& e : reference->GetElements())
for (const VertexElement& e : refElements)
{
bool missing = true;
for (const VertexElement& ee : base->GetElements())
@@ -282,6 +283,32 @@ GPUVertexLayout* GPUVertexLayout::Merge(GPUVertexLayout* base, GPUVertexLayout*
}
}
}
if (referenceOrder)
{
for (int32 i = 0, j = 0; i < newElements.Count() && j < refElements.Count(); j++)
{
if (newElements[i].Type == refElements[j].Type)
{
// Elements match so move forward
i++;
continue;
}
// Find reference element in a new list
for (int32 k = i + 1; k < newElements.Count(); k++)
{
if (newElements[k].Type == refElements[j].Type)
{
// Move matching element to the reference position
VertexElement e = newElements[k];
newElements.RemoveAt(k);
newElements.Insert(i, e);
i++;
break;
}
}
}
}
if (elementsModified)
result = Get(newElements, true);
}

View File

@@ -84,8 +84,9 @@ public:
/// <param name="removeUnused">True to remove elements from base layout that don't exist in a reference layout.</param>
/// <param name="addMissing">True to add missing elements to base layout that exist in a reference layout.</param>
/// <param name="missingSlotOverride">Allows to override the input slot for missing elements. Use value -1 to inherit slot from the reference layout.</param>
/// <param name="referenceOrder">True to reorder result elements to match the reference layout. For example, if input vertex buffer layout is different than vertex shader then it can match those.</param>
/// <returns>Vertex layout object. Doesn't need to be cleared as it's cached for an application lifetime.</returns>
static GPUVertexLayout* Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused = false, bool addMissing = true, int32 missingSlotOverride = -1);
static GPUVertexLayout* Merge(GPUVertexLayout* base, GPUVertexLayout* reference, bool removeUnused = false, bool addMissing = true, int32 missingSlotOverride = -1, bool referenceOrder = false);
public:
// [GPUResource]

View File

@@ -51,6 +51,7 @@ Array<User*, FixedAllocation<8>> PlatformBase::Users;
Delegate<User*> PlatformBase::UserAdded;
Delegate<User*> PlatformBase::UserRemoved;
void* OutOfMemoryBuffer = nullptr;
volatile int64 FatalReporting = 0;
const Char* ToString(NetworkConnectionType value)
{
@@ -306,11 +307,20 @@ int32 PlatformBase::GetCacheLineSize()
void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType error)
{
// Let only one thread to report the error (and wait for it to end to have valid log before crash)
RETRY:
if (Platform::InterlockedCompareExchange(&FatalReporting, 1, 0) != 0)
{
Platform::Sleep(1);
goto RETRY;
}
// Check if is already during fatal state
if (Engine::FatalError != FatalErrorType::None)
{
// Just send one more error to the log and back
LOG(Error, "Error after fatal error: {0}", msg);
Platform::AtomicStore(&FatalReporting, 0);
return;
}
@@ -429,6 +439,8 @@ void PlatformBase::Fatal(const StringView& msg, void* context, FatalErrorType er
}
#endif
Platform::AtomicStore(&FatalReporting, 0);
// Show error message
if (Engine::ReportCrash.IsBinded())
Engine::ReportCrash(msg, context);

View File

@@ -375,6 +375,7 @@ void PostProcessingPass::Render(RenderContext& renderContext, GPUTexture* input,
RENDER_TARGET_POOL_SET_NAME(bloomBuffer1, "PostProcessing.Bloom");
RENDER_TARGET_POOL_SET_NAME(bloomBuffer2, "PostProcessing.Bloom");
// TODO: skip this clear? or do it at once for the whole textures (2 calls instead of per-mip)
for (int32 mip = 0; mip < bloomMipCount; mip++)
{
context->Clear(bloomBuffer1->View(0, mip), Color::Transparent);

View File

@@ -917,6 +917,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
perDraw.DrawPadding = Float3::Zero;
GPUConstantBuffer* perDrawCB = IMaterial::BindParameters::PerDrawConstants;
context->BindCB(2, perDrawCB); // TODO: use rootSignature/pushConstants on D3D12/Vulkan
context->UpdateCB(perDrawCB, &perDraw);
constexpr int32 vbMax = ARRAY_COUNT(DrawCall::Geometry.VertexBuffers);
if (useInstancing)
{
@@ -1057,7 +1058,7 @@ void RenderList::ExecuteDrawCalls(const RenderContext& renderContext, DrawCallsL
materialBinds += list.PreBatchedDrawCalls.Count();
if (list.Batches.IsEmpty() && list.Indices.Count() != 0)
{
// Draw calls list has bot been batched so execute draw calls separately
// Draw calls list has not been batched so execute draw calls separately
for (int32 j = 0; j < list.Indices.Count(); j++)
{
perDraw.DrawObjectIndex = listData[j];

View File

@@ -273,7 +273,7 @@ struct DrawCallsList
/// <summary>
/// True if draw calls batches list can be rendered using hardware instancing, otherwise false.
/// </summary>
bool CanUseInstancing;
bool CanUseInstancing = true;
void Clear();
bool IsEmpty() const;

View File

@@ -2137,6 +2137,53 @@ static void* OnMonoDlFallbackClose(void* handle, void* user_data)
#endif
#ifdef USE_MONO_AOT_MODULE
#include "Engine/Threading/ThreadPoolTask.h"
#include "Engine/Engine/EngineService.h"
class MonoAotPreloadTask : public ThreadPoolTask
{
public:
bool Run() override;
};
// Preloads in-build AOT dynamic module in async
class MonoAotPreloadService : public EngineService
{
public:
volatile int64 Ready = 0;
void* Library = nullptr;
MonoAotPreloadService()
: EngineService(TEXT("AOT Preload"), -800)
{
}
bool Init() override
{
New<MonoAotPreloadTask>()->Start();
return false;
}
};
MonoAotPreloadService MonoAotPreloadServiceInstance;
bool MonoAotPreloadTask::Run()
{
// Load AOT module
Stopwatch aotModuleLoadStopwatch;
//LOG(Info, "Loading Mono AOT module...");
MonoAotPreloadServiceInstance.Library = Platform::LoadLibrary(TEXT(USE_MONO_AOT_MODULE));
aotModuleLoadStopwatch.Stop();
LOG(Info, "Mono AOT module loaded in {0}ms", aotModuleLoadStopwatch.GetMilliseconds());
Platform::AtomicStore(&MonoAotPreloadServiceInstance.Ready, 1);
return false;
}
#endif
bool InitHostfxr()
{
#if DOTNET_HOST_MONO_DEBUG
@@ -2167,10 +2214,12 @@ bool InitHostfxr()
#endif
#ifdef USE_MONO_AOT_MODULE
// Load AOT module
Stopwatch aotModuleLoadStopwatch;
LOG(Info, "Loading Mono AOT module...");
void* libAotModule = Platform::LoadLibrary(TEXT(USE_MONO_AOT_MODULE));
// Wait for AOT module preloading
while (Platform::AtomicRead(&MonoAotPreloadServiceInstance.Ready) == 0)
Platform::Yield();
// Initialize AOT module
void* libAotModule = MonoAotPreloadServiceInstance.Library;
if (libAotModule == nullptr)
{
LOG(Error, "Failed to laod Mono AOT module (" TEXT(USE_MONO_AOT_MODULE) ")");
@@ -2193,8 +2242,6 @@ bool InitHostfxr()
mono_aot_register_module((void**)modules[i]);
}
Allocator::Free(modules);
aotModuleLoadStopwatch.Stop();
LOG(Info, "Mono AOT module loaded in {0}ms", aotModuleLoadStopwatch.GetMilliseconds());
#endif
// Setup debugger

View File

@@ -278,6 +278,17 @@ bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* co
return false;
}
bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache1, int32 cache1Size, const void* cache2, int32 cache2Size)
{
auto output = context->Output;
output->Write((uint32)(cache1Size + cache2Size + headerSize));
output->WriteBytes(header, headerSize);
output->WriteBytes(cache1, cache1Size);
output->WriteBytes(cache2, cache2Size);
output->Write(bindings);
return false;
}
bool ShaderCompiler::WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* cache, int32 cacheSize)
{
auto output = context->Output;

View File

@@ -108,6 +108,7 @@ protected:
static bool WriteShaderFunctionBegin(ShaderCompilationContext* context, ShaderFunctionMeta& meta);
static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache, int32 cacheSize);
static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* header, int32 headerSize, const void* cache1, int32 cache1Size, const void* cache2, int32 cache2Size);
static bool WriteShaderFunctionPermutation(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const ShaderBindings& bindings, const void* cache, int32 cacheSize);
static bool WriteShaderFunctionEnd(ShaderCompilationContext* context, ShaderFunctionMeta& meta);
static bool WriteCustomDataVS(ShaderCompilationContext* context, ShaderFunctionMeta& meta, int32 permutationIndex, const Array<ShaderMacro>& macros, void* additionalData);